In [ ]:
import os
import sys
"""Linux"""
os.chdir("/home/optimusprime/Desktop/peeterson/github/DeepAR_demand_prediction/Rental_Fleet_Demand_Forecast")
sys.path.append(os.path.abspath(os.path.join("/home/optimusprime/Desktop/peeterson/github/DeepAR_demand_prediction/Rental_Fleet_Demand_Forecast")))
os.chdir("/home/optimusprime/Desktop/peeterson/github/DeepAR_demand_prediction/Rental_Fleet_Demand_Forecast/libs/linux-gpu")
#from ctypes import FormatError
import numpy as np
import warnings
warnings.filterwarnings("ignore")
import os,sys
import torch
torch.use_deterministic_algorithms(True)
import pytorch_lightning as pl
from pytorch_forecasting.data.encoders import TorchNormalizer
from pytorch_forecasting.metrics import SMAPE, RMSE
from torchmetrics import R2Score, SymmetricMeanAbsolutePercentageError, MeanSquaredError
import matplotlib.pyplot as plt
import pandas as pd
from pytorch_forecasting.data import TimeSeriesDataSet
from pytorch_forecasting.data.encoders import NaNLabelEncoder
from pytorch_lightning.callbacks import EarlyStopping, LearningRateMonitor
import torch
from pytorch_forecasting.data.encoders import TorchNormalizer
import os,sys
import numpy as np
from statsmodels.tsa.stattools import adfuller
from statsmodels.tsa.seasonal import seasonal_decompose
from statsmodels.graphics.tsaplots import plot_acf
from statsmodels.graphics.tsaplots import plot_pacf
from statsmodels.tsa.stattools import acf,pacf
from scipy.signal import find_peaks
import operator
import statsmodels.api as sm
from itertools import combinations
import pickle
from pytorch_forecasting import Baseline
import random
from pytorch_forecasting import DeepAR,NegativeBinomialDistributionLoss
from itertools import product
from sklearn.metrics import mean_absolute_error, mean_squared_error
import optuna
from optuna.trial import TrialState
import plotly
#from deepar_RegionWise_LinuxGpu_prediction_dev import train_and_forecast
from sklearn.metrics import classification_report
"""
Set Random seed
"""
random.seed(0)
torch.manual_seed(0)
np.random.seed(0)
## additional seeding to ensure reproduciblility.
pl.seed_everything(0)
2024-04-04 10:36:51.834553: I tensorflow/core/util/port.cc:113] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`. 2024-04-04 10:36:51.835818: I external/local_tsl/tsl/cuda/cudart_stub.cc:31] Could not find cuda drivers on your machine, GPU will not be used. 2024-04-04 10:36:51.854166: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:9261] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered 2024-04-04 10:36:51.854186: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:607] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered 2024-04-04 10:36:51.854811: E external/local_xla/xla/stream_executor/cuda/cuda_blas.cc:1515] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered 2024-04-04 10:36:51.858589: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations. To enable the following instructions: AVX2 AVX_VNNI FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags. 2024-04-04 10:36:52.538971: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Could not find TensorRT Global seed set to 0
Out[ ]:
0
In [ ]:
standardised_num_ep = 30
reduced_dataset_size = 0.9
Experiment 6¶
In [ ]:
### Best hyperparams
neu = 210
lay = 1
bat = 32
lr = 0.045984777597412985
drop = 0.0
num_ep = standardised_num_ep
dataset_size =reduced_dataset_size
In [ ]:
### Your code
os.chdir('/home/optimusprime/Desktop/Data_Timothy')
############ Basically df = raw_feat_df <-------------------------
df = pd.read_parquet('EXP_6_combined_dd_ss_raw_feat_data_poi_aggregation.parquet', engine='pyarrow')
df
Out[ ]:
| time_idx | datetime | group | target | inflow_lag_0 | inflow_lag_1 | inflow_lag_2 | inflow_lag_3 | inflow_lag_4 | inflow_lag_5 | ... | inflow_diff2_per1__fft_coefficient__attr_"real"__coeff_1 | inflow_diff2_per1__count_above_mean | inflow_diff2_per1__count_below_mean | inflow_diff2_per1__autocorrelation__lag_1 | inflow_diff2_per1__autocorrelation__lag_3 | inflow_diff2_per1__partial_autocorrelation__lag_1 | inflow_diff2_per1__skewness | inflow_diff2_per1__autocorrelation__lag_2 | inflow_diff2_per1__kurtosis | inflow_diff2_per1__partial_autocorrelation__lag_2 | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 169 | 2021-01-08 01:00:00 | school | 66.0 | 101.0 | 87.0 | 422.0 | 448.0 | 305.0 | 90.0 | ... | -60.456708 | 0.0 | 0.0 | -0.081640 | -0.027738 | -0.081622 | -0.496245 | -0.263488 | 0.490017 | -0.303142 |
| 1 | 170 | 2021-01-08 02:00:00 | school | 32.0 | 50.0 | 53.0 | 410.0 | 327.0 | 652.0 | 37.0 | ... | -14.000000 | 1.0 | 1.0 | -1.000000 | -0.027738 | -0.081622 | -0.496245 | -0.263488 | 0.490017 | -0.303142 |
| 2 | 171 | 2021-01-08 03:00:00 | school | 19.0 | 30.0 | 33.0 | 285.0 | 177.0 | 1094.0 | 24.0 | ... | -2.500000 | 1.0 | 2.0 | -0.849256 | -0.027738 | -0.081622 | 0.633716 | 0.198511 | 0.490017 | -0.303142 |
| 3 | 172 | 2021-01-08 04:00:00 | school | 25.0 | 19.0 | 25.0 | 143.0 | 101.0 | 1120.0 | 20.0 | ... | 9.000000 | 2.0 | 2.0 | -0.929158 | -0.711447 | -0.929158 | -0.071375 | 0.749460 | -2.711060 | -0.303142 |
| 4 | 173 | 2021-01-08 05:00:00 | school | 105.0 | 30.0 | 32.0 | 69.0 | 50.0 | 1135.0 | 18.0 | ... | 1.225425 | 3.0 | 2.0 | -0.268487 | -0.207884 | -0.268487 | -0.544485 | -0.049543 | -1.927011 | -0.303142 |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 246435 | 24808 | 2023-10-31 16:00:00 | religious | 732.0 | 998.0 | 848.0 | 906.0 | 1070.0 | 678.0 | 1169.0 | ... | -2348.787241 | 13.0 | 12.0 | 0.160769 | -0.059342 | 0.160769 | -0.004889 | -0.165414 | -1.039837 | -0.196335 |
| 246436 | 24809 | 2023-10-31 17:00:00 | religious | 637.0 | 840.0 | 798.0 | 910.0 | 882.0 | 476.0 | 942.0 | ... | -1053.705532 | 14.0 | 11.0 | 0.129439 | -0.185665 | 0.129439 | -0.071656 | -0.180286 | -0.891656 | -0.200398 |
| 246437 | 24810 | 2023-10-31 18:00:00 | religious | 602.0 | 697.0 | 737.0 | 951.0 | 879.0 | 297.0 | 895.0 | ... | -159.359368 | 15.0 | 10.0 | 0.142760 | -0.226929 | 0.142760 | -0.186733 | -0.239184 | -0.900303 | -0.264965 |
| 246438 | 24811 | 2023-10-31 19:00:00 | religious | 452.0 | 665.0 | 678.0 | 998.0 | 942.0 | 124.0 | 933.0 | ... | -141.605817 | 14.0 | 11.0 | 0.139179 | -0.186107 | 0.139179 | -0.059585 | -0.234582 | -1.027312 | -0.258970 |
| 246439 | 24812 | 2023-10-31 20:00:00 | religious | 458.0 | 582.0 | 476.0 | 840.0 | 738.0 | 77.0 | 819.0 | ... | 582.322144 | 14.0 | 11.0 | 0.143353 | -0.139791 | 0.143353 | -0.023768 | -0.199484 | -0.981787 | -0.224651 |
246440 rows × 208 columns
In [ ]:
working_directory = '/home/optimusprime/Desktop/Training_Prediction_4_5_6'
os.chdir(working_directory)
In [ ]:
first_idx = df.loc[df['group'] == 'school', 'time_idx'].iloc[0]
total_time_idx = df[df.group == 'school'].shape[0]
reduced_df_time_idx = [*range(int((1-dataset_size)*total_time_idx), total_time_idx+1, 1)]
df = df[df['time_idx'].isin(reduced_df_time_idx)]
In [ ]:
numeric_cols = list(df.columns)
numeric_cols = [e for e in numeric_cols if e not in ('time_idx', 'datetime', 'group', 'target')]
print(numeric_cols)
['inflow_lag_0', 'inflow_lag_1', 'inflow_lag_2', 'inflow_lag_3', 'inflow_lag_4', 'inflow_lag_5', 'outflow_lag_0', 'outflow_lag_1', 'outflow_lag_2', 'outflow_lag_3', 'outflow_lag_4', 'outflow_lag_5', 'inflow', 'inflow_diff1_per1', 'inflow_diff1_per2', 'inflow_diff2_per1', 'inflow_lag_0__fft_coefficient__attr_"real"__coeff_1', 'inflow_lag_0__skewness', 'inflow_lag_0__kurtosis', 'inflow_lag_0__count_above_mean', 'inflow_lag_0__count_below_mean', 'inflow_lag_0__autocorrelation__lag_1', 'inflow_lag_0__autocorrelation__lag_2', 'inflow_lag_0__autocorrelation__lag_3', 'inflow_lag_0__partial_autocorrelation__lag_1', 'inflow_lag_0__partial_autocorrelation__lag_2', 'inflow_lag_0__partial_autocorrelation__lag_3', 'inflow_lag_0__fft_coefficient__attr_"real"__coeff_0', 'inflow_lag_1__fft_coefficient__attr_"real"__coeff_1', 'inflow_lag_1__skewness', 'inflow_lag_1__kurtosis', 'inflow_lag_1__count_above_mean', 'inflow_lag_1__count_below_mean', 'inflow_lag_1__autocorrelation__lag_1', 'inflow_lag_1__autocorrelation__lag_2', 'inflow_lag_1__autocorrelation__lag_3', 'inflow_lag_1__partial_autocorrelation__lag_1', 'inflow_lag_1__partial_autocorrelation__lag_2', 'inflow_lag_1__partial_autocorrelation__lag_3', 'inflow_lag_1__fft_coefficient__attr_"real"__coeff_0', 'inflow_lag_2__fft_coefficient__attr_"real"__coeff_1', 'inflow_lag_2__skewness', 'inflow_lag_2__kurtosis', 'inflow_lag_2__count_above_mean', 'inflow_lag_2__count_below_mean', 'inflow_lag_2__autocorrelation__lag_1', 'inflow_lag_2__autocorrelation__lag_2', 'inflow_lag_2__autocorrelation__lag_3', 'inflow_lag_2__partial_autocorrelation__lag_1', 'inflow_lag_2__partial_autocorrelation__lag_2', 'inflow_lag_2__partial_autocorrelation__lag_3', 'inflow_lag_2__fft_coefficient__attr_"real"__coeff_0', 'inflow_lag_3__fft_coefficient__attr_"real"__coeff_1', 'inflow_lag_3__skewness', 'inflow_lag_3__kurtosis', 'inflow_lag_3__count_above_mean', 'inflow_lag_3__count_below_mean', 'inflow_lag_3__autocorrelation__lag_3', 'inflow_lag_3__autocorrelation__lag_2', 'inflow_lag_3__partial_autocorrelation__lag_1', 'inflow_lag_3__autocorrelation__lag_1', 'inflow_lag_3__partial_autocorrelation__lag_3', 'inflow_lag_3__partial_autocorrelation__lag_2', 'inflow_lag_3__fft_coefficient__attr_"real"__coeff_0', 'inflow_lag_4__fft_coefficient__attr_"real"__coeff_1', 'inflow_lag_4__skewness', 'inflow_lag_4__kurtosis', 'inflow_lag_4__count_above_mean', 'inflow_lag_4__count_below_mean', 'inflow_lag_4__autocorrelation__lag_3', 'inflow_lag_4__autocorrelation__lag_2', 'inflow_lag_4__partial_autocorrelation__lag_1', 'inflow_lag_4__autocorrelation__lag_1', 'inflow_lag_4__partial_autocorrelation__lag_2', 'inflow_lag_4__fft_coefficient__attr_"real"__coeff_0', 'inflow_lag_5__fft_coefficient__attr_"real"__coeff_1', 'inflow_lag_5__skewness', 'inflow_lag_5__kurtosis', 'inflow_lag_5__count_above_mean', 'inflow_lag_5__count_below_mean', 'inflow_lag_5__autocorrelation__lag_1', 'inflow_lag_5__autocorrelation__lag_2', 'inflow_lag_5__autocorrelation__lag_3', 'inflow_lag_5__partial_autocorrelation__lag_1', 'inflow_lag_5__partial_autocorrelation__lag_2', 'inflow_lag_5__partial_autocorrelation__lag_3', 'inflow_lag_5__fft_coefficient__attr_"real"__coeff_0', 'outflow_lag_0__fft_coefficient__attr_"real"__coeff_1', 'outflow_lag_0__skewness', 'outflow_lag_0__kurtosis', 'outflow_lag_0__count_above_mean', 'outflow_lag_0__count_below_mean', 'outflow_lag_0__autocorrelation__lag_1', 'outflow_lag_0__autocorrelation__lag_2', 'outflow_lag_0__autocorrelation__lag_3', 'outflow_lag_0__partial_autocorrelation__lag_1', 'outflow_lag_0__partial_autocorrelation__lag_2', 'outflow_lag_0__partial_autocorrelation__lag_3', 'outflow_lag_0__fft_coefficient__attr_"real"__coeff_0', 'outflow_lag_1__fft_coefficient__attr_"real"__coeff_1', 'outflow_lag_1__skewness', 'outflow_lag_1__kurtosis', 'outflow_lag_1__count_above_mean', 'outflow_lag_1__count_below_mean', 'outflow_lag_1__autocorrelation__lag_1', 'outflow_lag_1__autocorrelation__lag_2', 'outflow_lag_1__autocorrelation__lag_3', 'outflow_lag_1__partial_autocorrelation__lag_1', 'outflow_lag_1__partial_autocorrelation__lag_2', 'outflow_lag_1__partial_autocorrelation__lag_3', 'outflow_lag_1__fft_coefficient__attr_"real"__coeff_0', 'outflow_lag_2__fft_coefficient__attr_"real"__coeff_1', 'outflow_lag_2__skewness', 'outflow_lag_2__kurtosis', 'outflow_lag_2__count_above_mean', 'outflow_lag_2__count_below_mean', 'outflow_lag_2__autocorrelation__lag_1', 'outflow_lag_2__autocorrelation__lag_2', 'outflow_lag_2__autocorrelation__lag_3', 'outflow_lag_2__partial_autocorrelation__lag_1', 'outflow_lag_2__partial_autocorrelation__lag_2', 'outflow_lag_2__partial_autocorrelation__lag_3', 'outflow_lag_2__fft_coefficient__attr_"real"__coeff_0', 'outflow_lag_3__fft_coefficient__attr_"real"__coeff_1', 'outflow_lag_3__skewness', 'outflow_lag_3__kurtosis', 'outflow_lag_3__count_above_mean', 'outflow_lag_3__count_below_mean', 'outflow_lag_3__autocorrelation__lag_3', 'outflow_lag_3__autocorrelation__lag_2', 'outflow_lag_3__fft_coefficient__attr_"real"__coeff_0', 'outflow_lag_3__partial_autocorrelation__lag_1', 'outflow_lag_3__autocorrelation__lag_1', 'outflow_lag_3__partial_autocorrelation__lag_2', 'outflow_lag_4__fft_coefficient__attr_"real"__coeff_1', 'outflow_lag_4__skewness', 'outflow_lag_4__kurtosis', 'outflow_lag_4__count_above_mean', 'outflow_lag_4__count_below_mean', 'outflow_lag_4__autocorrelation__lag_2', 'outflow_lag_4__autocorrelation__lag_3', 'outflow_lag_4__partial_autocorrelation__lag_1', 'outflow_lag_4__autocorrelation__lag_1', 'outflow_lag_4__partial_autocorrelation__lag_2', 'outflow_lag_4__fft_coefficient__attr_"real"__coeff_0', 'outflow_lag_4__partial_autocorrelation__lag_3', 'outflow_lag_5__fft_coefficient__attr_"real"__coeff_1', 'outflow_lag_5__skewness', 'outflow_lag_5__kurtosis', 'outflow_lag_5__count_above_mean', 'outflow_lag_5__count_below_mean', 'outflow_lag_5__autocorrelation__lag_1', 'outflow_lag_5__autocorrelation__lag_2', 'outflow_lag_5__autocorrelation__lag_3', 'outflow_lag_5__partial_autocorrelation__lag_1', 'outflow_lag_5__partial_autocorrelation__lag_2', 'outflow_lag_5__partial_autocorrelation__lag_3', 'outflow_lag_5__fft_coefficient__attr_"real"__coeff_0', 'inflow__fft_coefficient__attr_"real"__coeff_1', 'inflow__skewness', 'inflow__kurtosis', 'inflow__count_above_mean', 'inflow__count_below_mean', 'inflow__autocorrelation__lag_1', 'inflow__autocorrelation__lag_2', 'inflow__autocorrelation__lag_3', 'inflow__partial_autocorrelation__lag_1', 'inflow__partial_autocorrelation__lag_2', 'inflow__partial_autocorrelation__lag_3', 'inflow__fft_coefficient__attr_"real"__coeff_0', 'inflow_diff1_per1__fft_coefficient__attr_"real"__coeff_0', 'inflow_diff1_per1__autocorrelation__lag_3', 'inflow_diff1_per1__partial_autocorrelation__lag_2', 'inflow_diff1_per1__count_above_mean', 'inflow_diff1_per1__count_below_mean', 'inflow_diff1_per1__skewness', 'inflow_diff1_per1__autocorrelation__lag_2', 'inflow_diff1_per1__autocorrelation__lag_1', 'inflow_diff1_per1__partial_autocorrelation__lag_1', 'inflow_diff1_per1__fft_coefficient__attr_"real"__coeff_1', 'inflow_diff1_per1__kurtosis', 'inflow_diff1_per1__partial_autocorrelation__lag_3', 'inflow_diff1_per2__fft_coefficient__attr_"real"__coeff_0', 'inflow_diff1_per2__fft_coefficient__attr_"real"__coeff_1', 'inflow_diff1_per2__count_above_mean', 'inflow_diff1_per2__count_below_mean', 'inflow_diff1_per2__autocorrelation__lag_3', 'inflow_diff1_per2__skewness', 'inflow_diff1_per2__partial_autocorrelation__lag_2', 'inflow_diff1_per2__autocorrelation__lag_2', 'inflow_diff1_per2__kurtosis', 'inflow_diff1_per2__partial_autocorrelation__lag_1', 'inflow_diff1_per2__autocorrelation__lag_1', 'inflow_diff2_per1__fft_coefficient__attr_"real"__coeff_0', 'inflow_diff2_per1__fft_coefficient__attr_"real"__coeff_1', 'inflow_diff2_per1__count_above_mean', 'inflow_diff2_per1__count_below_mean', 'inflow_diff2_per1__autocorrelation__lag_1', 'inflow_diff2_per1__autocorrelation__lag_3', 'inflow_diff2_per1__partial_autocorrelation__lag_1', 'inflow_diff2_per1__skewness', 'inflow_diff2_per1__autocorrelation__lag_2', 'inflow_diff2_per1__kurtosis', 'inflow_diff2_per1__partial_autocorrelation__lag_2']
In [ ]:
#################### add date information ts ####################
# create datetime features from datetime column in raw_feat_df
df['_hour_of_day'] = df["datetime"].dt.hour.astype(str)
df['_day_of_week'] = df["datetime"].dt.dayofweek.astype(str)
df['_day_of_month'] = df["datetime"].dt.day.astype(str)
df['_day_of_year'] = df["datetime"].dt.dayofyear.astype(str)
df['_week_of_year'] = df["datetime"].dt.weekofyear.astype(str)
df['_month_of_year'] = df["datetime"].dt.month.astype(str)
df['_year'] = df["datetime"].dt.year.astype(str)
#################### add date information ts ####################
hour_of_day_nunique = df['_hour_of_day'].nunique()
day_of_week_nunique = df['_day_of_week'].nunique()
day_of_month_nunique = df['_day_of_month'].nunique()
day_of_year_nunique = df['_day_of_year'].nunique()
week_of_year_nunique = df['_week_of_year'].nunique()
month_of_year_nunique = df['_month_of_year'].nunique()
year_nunique = df['_year'].nunique()
In [ ]:
df
Out[ ]:
| time_idx | datetime | group | target | inflow_lag_0 | inflow_lag_1 | inflow_lag_2 | inflow_lag_3 | inflow_lag_4 | inflow_lag_5 | ... | inflow_diff2_per1__autocorrelation__lag_2 | inflow_diff2_per1__kurtosis | inflow_diff2_per1__partial_autocorrelation__lag_2 | _hour_of_day | _day_of_week | _day_of_month | _day_of_year | _week_of_year | _month_of_year | _year | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 2295 | 2464 | 2021-04-13 16:00:00 | school | 1927.0 | 1988.0 | 2050.0 | 1825.0 | 1771.0 | 490.0 | 2001.0 | ... | -0.105560 | 1.013099 | -0.165698 | 16 | 1 | 13 | 103 | 15 | 4 | 2021 |
| 2296 | 2465 | 2021-04-13 17:00:00 | school | 1968.0 | 2061.0 | 2195.0 | 1909.0 | 2073.0 | 313.0 | 2060.0 | ... | -0.109107 | 1.017013 | -0.168269 | 17 | 1 | 13 | 103 | 15 | 4 | 2021 |
| 2297 | 2466 | 2021-04-13 18:00:00 | school | 1654.0 | 2007.0 | 1897.0 | 1932.0 | 2069.0 | 147.0 | 1997.0 | ... | -0.082354 | 3.896016 | -0.083866 | 18 | 1 | 13 | 103 | 15 | 4 | 2021 |
| 2298 | 2467 | 2021-04-13 19:00:00 | school | 1288.0 | 1586.0 | 1422.0 | 1498.0 | 1988.0 | 46.0 | 1723.0 | ... | -0.018635 | 5.679729 | -0.037163 | 19 | 1 | 13 | 103 | 15 | 4 | 2021 |
| 2299 | 2468 | 2021-04-13 20:00:00 | school | 977.0 | 1252.0 | 943.0 | 1085.0 | 2061.0 | 46.0 | 1288.0 | ... | -0.025986 | 5.739378 | -0.042606 | 20 | 1 | 13 | 103 | 15 | 4 | 2021 |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 246267 | 24640 | 2023-10-24 16:00:00 | religious | 699.0 | 1070.0 | 902.0 | 799.0 | 1048.0 | 647.0 | 1097.0 | ... | -0.346793 | 0.422282 | -0.346987 | 16 | 1 | 24 | 297 | 43 | 10 | 2023 |
| 246268 | 24641 | 2023-10-24 17:00:00 | religious | 711.0 | 882.0 | 686.0 | 734.0 | 993.0 | 441.0 | 856.0 | ... | -0.367110 | 0.749651 | -0.383293 | 17 | 1 | 24 | 297 | 43 | 10 | 2023 |
| 246269 | 24642 | 2023-10-24 18:00:00 | religious | 697.0 | 879.0 | 710.0 | 906.0 | 852.0 | 259.0 | 970.0 | ... | -0.373344 | 0.757791 | -0.387397 | 18 | 1 | 24 | 297 | 43 | 10 | 2023 |
| 246270 | 24643 | 2023-10-24 19:00:00 | religious | 740.0 | 942.0 | 647.0 | 1070.0 | 932.0 | 117.0 | 870.0 | ... | -0.408777 | 0.120504 | -0.420055 | 19 | 1 | 24 | 297 | 43 | 10 | 2023 |
| 246271 | 24644 | 2023-10-24 20:00:00 | religious | 335.0 | 738.0 | 441.0 | 882.0 | 709.0 | 47.0 | 783.0 | ... | -0.427382 | 0.153921 | -0.427408 | 20 | 1 | 24 | 297 | 43 | 10 | 2023 |
221810 rows × 215 columns
In [ ]:
print(hour_of_day_nunique)
print(day_of_week_nunique)
print(day_of_month_nunique)
print(day_of_year_nunique)
print(week_of_year_nunique)
print(month_of_year_nunique)
print(year_nunique)
24 7 31 365 52 12 3
In [ ]:
unique_values_group_no_ts_fresh = list(df['group'].unique())
print(len(unique_values_group_no_ts_fresh), "\n")
print(unique_values_group_no_ts_fresh)
10 ['school', 'school_uni', 'school_sec', 'school_pri', 'food', 'food_restaurant', 'food_morning', 'food_bar', 'attractions', 'religious']
In [ ]:
encoder_len = 8 # looks back 8 hours in the past for prediction
pred_len = 1 # 1, 6, 12 # how many timeseteps you want to predict in the future (forecasting), 1 means present (nowcasting)
#cov_lag_len= pred_len
# not using covariates yet, so put as 0
cov_lag_len= 0 #we can use forecasted values, even for inflow
####### train val test split of tsfresh dataframe #########
### you want a the idx for each train, validation and test
### train = 0.7 of each zone (eg. time idx 167 to roughly 20000) eg train = 0.7 * (24812-167)
### validation = 0.2 of each zone (eg. time idx 20000 to 22000)
### test = 0.1 of each zone (eg. time idx 22000 to 24000)
Target = 'target'
total_time_idx = df[df.group == 'school'].shape[0]
first_idx = df.loc[df['group'] == 'school', 'time_idx'].iloc[0]
tr_stop_idx = int(0.7*total_time_idx)
val_start_idx = tr_stop_idx - encoder_len
# val_stop_idx = total_time_idx-1
val_stop_idx = val_start_idx + int(0.2*total_time_idx) # use (tr_stop_idx - encoder_len) as encoder from train set for validatoin
tes_start_idx = val_stop_idx - encoder_len
tes_stop_idx = total_time_idx-1
# tes_start_idx = tr_stop_idx - encoder_len
# tes_stop_idx = total_time_idx-1
print(tr_stop_idx)
print(val_start_idx)
print(val_stop_idx)
print(tes_start_idx)
print(tes_stop_idx)
15526 15518 19954 19946 22180
In [ ]:
train_time_idx = [*range(first_idx, first_idx+tr_stop_idx+1, 1)]
print(train_time_idx[0], train_time_idx[-1])
val_time_idx = [*range(first_idx+val_start_idx, first_idx+val_stop_idx+1, 1)]
print(val_time_idx[0], val_time_idx[-1])
test_time_idx = [*range(first_idx+tes_start_idx, first_idx+tes_stop_idx+1, 1)]
print(test_time_idx[0], test_time_idx[-1])
2464 17990 17982 22418 22410 24644
In [ ]:
"""
set inputs here
(hyperparameters grid search)
"""
######### Network Architecture ###################
# p = 10 # patience no. of epochs
Loss=NegativeBinomialDistributionLoss() # because we assume that the demand follows a negative bionmial distribution
######### Network Architecture ###################
######### Training Routine ###################
fdv_steps = 10 # fast_dev_run -> small set of training to see if training is happening correctly (one of the params in deepar model)
######### Training Routine ###################
# this is for naive forecast, getting past week's demand and predicting purely based on that
############## Inputs for 2) Persistance model ( seasonal naive forecast ) #######################
season_len = 168 # length of season ()
num_past_seas = 6 # number of past seasons to use in averaging (look at 6 week's back, getting average of these 6 values)
#seas_pred_strt_idx = 2035 # seasonal naive forecast start index, in hours use the df dataframe
############## Inputs for 2) Persistance model ( seasonal naive forecast ) #######################
In [ ]:
lr_logger = LearningRateMonitor()
cat_dict = {"_hour_of_day": NaNLabelEncoder(add_nan=True).fit(df._hour_of_day),
"_day_of_week": NaNLabelEncoder(add_nan=True).fit(df._day_of_week), "_day_of_month" : NaNLabelEncoder(add_nan=True).fit(df._day_of_month), "_day_of_year" : NaNLabelEncoder(add_nan=True).fit(df._day_of_year),
"_week_of_year": NaNLabelEncoder(add_nan=True).fit(df._week_of_year), "_month_of_year": NaNLabelEncoder(add_nan=True).fit(df._month_of_year) ,"_year": NaNLabelEncoder(add_nan=True).fit(df._year)}
cat_list = ["_hour_of_day","_day_of_week","_day_of_month","_day_of_year","_week_of_year","_month_of_year","_year"]
embed_size_dict = {'_hour_of_day':(hour_of_day_nunique+1,5),'_day_of_week':(day_of_week_nunique+1,5),'_day_of_month':(day_of_month_nunique+1,5), '_day_of_year':(day_of_year_nunique+1,8), '_week_of_year':(week_of_year_nunique+1,3), '_month_of_year':(month_of_year_nunique+1,3),'_year':(year_nunique+1,1)}
train_dataset = TimeSeriesDataSet(
df[(df['time_idx'].isin(train_time_idx))],
time_idx="time_idx",
target=Target,
categorical_encoders=cat_dict,
group_ids=["group"],
min_encoder_length=encoder_len,
max_encoder_length=encoder_len,
min_prediction_length=pred_len,
max_prediction_length=pred_len,
time_varying_unknown_reals=[Target],
time_varying_known_reals=numeric_cols,
time_varying_known_categoricals=cat_list,
add_relative_time_idx=False,
randomize_length=False,
scalers={},
target_normalizer=TorchNormalizer(method="identity",center=False,transformation=None)
)
val_dataset = TimeSeriesDataSet.from_dataset(train_dataset,df[(df['time_idx'].isin(val_time_idx))], stop_randomization=True, predict=False)
# test_dataset = TimeSeriesDataSet.from_dataset(train_dataset,df[df['time_idx'].isin(test_time_idx)], stop_randomization=True)
train_dataloader = train_dataset.to_dataloader(train=True, batch_size=bat)
val_dataloader = val_dataset.to_dataloader(train=False, batch_size=bat)
# test_dataloader = test_dataset.to_dataloader(train=False, batch_size=bat)
In [ ]:
"""
Machine Learning predictions START
1) DeepAR
"""
trainer = pl.Trainer(
max_epochs=num_ep,
accelerator='gpu',
devices=[0],
auto_lr_find=False,
gradient_clip_val=0.1,
limit_train_batches=1.0,
limit_val_batches=1.0,
#fast_dev_run=fdv_steps,
logger=True,
#log_every_n_steps=10,
# profiler=True,
callbacks=[lr_logger]#, early_stop_callback],
#enable_checkpointing=True,
#default_root_dir="C:\Work\WORK_PACKAGE\Demand_forecasting\github\DeepAR-pytorch\My_model\2_freq_nbinom_LSTM\1_cluster_demand_prediction\logs"
)
#print(f"training routing:\n \n {trainer}")
deepar =DeepAR.from_dataset(
train_dataset,
learning_rate=lr,
hidden_size=neu,
rnn_layers=lay,
dropout=drop,
loss=Loss,
log_interval=20,
log_val_interval=6,
log_gradient_flow=False,
embedding_sizes=embed_size_dict
# reduce_on_plateau_patience=3,
)
#print(f"Number of parameters in network: {deepar.size()/1e3:.1f}k")
# print(f"Model :\n \n {deepar}")
torch.set_num_threads(10)
trainer.fit(
deepar,
train_dataloaders=train_dataloader,
val_dataloaders=val_dataloader,
)
########## Prediction #####################
for group in unique_values_group_no_ts_fresh:
test_dataset = TimeSeriesDataSet.from_dataset(train_dataset,df[df['time_idx'].isin(test_time_idx) & (df['group'] == group)], stop_randomization=True)
test_dataloader = test_dataset.to_dataloader(train=False, batch_size=bat)
test_output = deepar.predict(data=test_dataloader,mode='prediction',return_index=True,num_workers=8,show_progress_bar=True)
pred_start_idx = int(test_output[1]['time_idx'][0])
actual1_full = np.array([])
pred_full = np.array([])
RMSE_list = np.array([])
WMAPE_list = np.array([])
days = tes_stop_idx - val_stop_idx - pred_len - pred_len
for j in range(0,days,pred_len):
# if j == 0:
# print('pred_start_idx = ',df["datetime"].iloc[pred_start_idx],pred_start_idx )
actual_df = df[df['group'] == group]
prediction_index = [*range(pred_start_idx+(j), pred_start_idx+(j)+pred_len, 1)]
actual = actual_df[actual_df['time_idx'].isin(prediction_index)]['target'].values
actual1_full = np.append(actual1_full, actual)
#plt.plot(actual,'*-')
pred = np.array(np.rint(test_output[0][j])).astype(int)
pred_full = np.append(pred_full, pred)
#plt.plot(pred,'^-')
#plt.show()
absolute_error = np.abs(actual - pred)
absolute_sum = np.abs(actual) + np.abs(pred)
WMAPE = np.mean(absolute_error / (absolute_sum + 1e-8)) * 100
# WMAPE = np.mean(np.abs(actual-pred)/np.abs(actual)+ 1e-8) * 100
RMSE = np.sqrt(mean_squared_error(actual,pred ))
#print('RMSE : ',RMSE)
RMSE_list = np.append(RMSE_list,RMSE)
WMAPE_list = np.append(WMAPE_list,WMAPE)
plt.figure(figsize=(25,5))
plt.plot(actual1_full.flatten(),'^-', label='Actual')
plt.plot(pred_full.flatten(),'*-', label='Predicted')
plt.title('Zone ' + str(group)) # Replace 'XYZ' with your actual group number
plt.legend()
plt.show()
print(f'Average RMSE for {days} days: ',np.mean(RMSE_list))
print('full average RMSE = ',np.sqrt(mean_squared_error(actual1_full.flatten(),pred_full.flatten() )))
print(f'Average WMAPE for {days} days: ',np.mean(WMAPE_list))
wmape_full = np.mean(np.abs(actual1_full.flatten() - pred_full.flatten()) / (np.abs(actual1_full.flatten()) + np.abs(pred_full.flatten()) + 1e-8)) * 100
print('full average WMAPE = ', wmape_full, '\n')
########## Prediction #####################
"""
Machine Learning predictions END
"""
GPU available: True (cuda), used: True TPU available: False, using: 0 TPU cores IPU available: False, using: 0 IPUs HPU available: False, using: 0 HPUs `Trainer(limit_train_batches=1.0)` was configured so 100% of the batches per epoch will be used.. `Trainer(limit_val_batches=1.0)` was configured so 100% of the batches will be used.. Missing logger folder: /home/optimusprime/Desktop/Training_Prediction_4_5_6/lightning_logs LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0] | Name | Type | Params ---------------------------------------------------------------------------- 0 | loss | NegativeBinomialDistributionLoss | 0 1 | logging_metrics | ModuleList | 0 2 | embeddings | MultiEmbedding | 3.5 K 3 | rnn | LSTM | 375 K 4 | distribution_projector | Linear | 422 ---------------------------------------------------------------------------- 379 K Trainable params 0 Non-trainable params 379 K Total params 1.517 Total estimated model params size (MB)
Epoch 29: 100%|██████████| 6234/6234 [02:32<00:00, 40.87it/s, loss=7.69, v_num=0, train_loss_step=7.470, val_loss=7.870, train_loss_epoch=7.760]
`Trainer.fit` stopped: `max_epochs=30` reached.
Epoch 29: 100%|██████████| 6234/6234 [02:32<00:00, 40.86it/s, loss=7.69, v_num=0, train_loss_step=7.470, val_loss=7.870, train_loss_epoch=7.760]
Predict: 100%|██████████| 70/70 [00:02<00:00, 33.18 batches/s]
Average RMSE for 2224 days: 397.36825539568343 full average RMSE = 495.8892369690298 Average WMAPE for 2224 days: 22.568708922630382 full average WMAPE = 22.568708922630385
Predict: 100%|██████████| 70/70 [00:01<00:00, 35.39 batches/s]
Average RMSE for 2224 days: 818.7297661870504 full average RMSE = 1030.1341929504192 Average WMAPE for 2224 days: 21.74744294500916 full average WMAPE = 21.74744294500916
Predict: 100%|██████████| 70/70 [00:02<00:00, 33.43 batches/s]
Average RMSE for 2224 days: 820.3493705035971 full average RMSE = 1031.9409784496384 Average WMAPE for 2224 days: 21.8365364600095 full average WMAPE = 21.8365364600095
Predict: 100%|██████████| 70/70 [00:02<00:00, 33.77 batches/s]
Average RMSE for 2224 days: 820.6515287769785 full average RMSE = 1031.7183780473813 Average WMAPE for 2224 days: 21.835058511609713 full average WMAPE = 21.835058511609713
Predict: 100%|██████████| 70/70 [00:01<00:00, 35.49 batches/s]
Average RMSE for 2224 days: 810.0247302158274 full average RMSE = 1009.1237595757459 Average WMAPE for 2224 days: 20.920521369447894 full average WMAPE = 20.920521369447894
Predict: 100%|██████████| 70/70 [00:01<00:00, 35.16 batches/s]
Average RMSE for 2224 days: 755.169964028777 full average RMSE = 934.9976488832825 Average WMAPE for 2224 days: 20.74105692446485 full average WMAPE = 20.741056924464846
Predict: 100%|██████████| 70/70 [00:02<00:00, 34.43 batches/s]
Average RMSE for 2224 days: 406.2571942446043 full average RMSE = 499.72058649643935 Average WMAPE for 2224 days: 20.67235366491844 full average WMAPE = 20.672353664918436
Predict: 100%|██████████| 70/70 [00:01<00:00, 35.10 batches/s]
Average RMSE for 2224 days: 435.5031474820144 full average RMSE = 523.1561432973756 Average WMAPE for 2224 days: 18.268170777942256 full average WMAPE = 18.268170777942256
Predict: 100%|██████████| 70/70 [00:02<00:00, 30.82 batches/s]
Average RMSE for 2224 days: 408.9226618705036 full average RMSE = 505.9115544235776 Average WMAPE for 2224 days: 26.651210971997557 full average WMAPE = 26.651210971997553
Predict: 100%|██████████| 70/70 [00:02<00:00, 25.33 batches/s]
Average RMSE for 2224 days: 521.5678956834532 full average RMSE = 614.6724631545094 Average WMAPE for 2224 days: 44.947004498310086 full average WMAPE = 44.947004498310086
Out[ ]:
'\nMachine Learning predictions END\n'
Experiment 5¶
In [ ]:
### Best hyperparams
neu = 210
lay = 3
bat = 32
lr = 0.01644130325721595
drop = 0.6
num_ep = standardised_num_ep
dataset_size = reduced_dataset_size
In [ ]:
### Your code
os.chdir('/home/optimusprime/Desktop/Data_Timothy')
############ Basically df = raw_feat_df <-------------------------
df = pd.read_parquet('EXP_5_combined_dd_ss_raw_feat_data_cluster_aggregation.parquet', engine='pyarrow')
df
Out[ ]:
| time_idx | datetime | group | target | inflow_lag_0 | inflow_lag_1 | inflow_lag_2 | inflow_lag_3 | outflow_lag_0 | outflow_lag_1 | ... | inflow_diff2_per1__fft_coefficient__attr_"real"__coeff_0 | inflow_diff2_per1__fft_coefficient__attr_"real"__coeff_1 | inflow_diff2_per1__kurtosis | inflow_diff2_per1__autocorrelation__lag_2 | inflow_diff2_per1__autocorrelation__lag_1 | inflow_diff2_per1__partial_autocorrelation__lag_1 | inflow_diff2_per1__partial_autocorrelation__lag_3 | inflow_diff2_per1__autocorrelation__lag_3 | inflow_diff2_per1__skewness | inflow_diff2_per1__count_above_mean | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 167 | 2021-01-07 23:00:00 | 50 | 59.0 | 107.0 | 38.0 | 12.0 | 117.0 | 122.0 | 41.0 | ... | -3.0 | -4.786351 | 0.561537 | -0.128245 | -0.269178 | -0.269166 | -0.110965 | -0.026810 | -0.468564 | 0.0 |
| 1 | 168 | 2021-01-08 00:00:00 | 50 | 32.0 | 67.0 | 14.0 | 30.0 | 158.0 | 59.0 | 23.0 | ... | 4.0 | -10.000000 | 0.561537 | -0.128245 | -1.000000 | -0.269166 | -0.110965 | -0.026810 | -0.468564 | 1.0 |
| 2 | 169 | 2021-01-08 01:00:00 | 50 | 32.0 | 34.0 | 17.0 | 84.0 | 120.0 | 32.0 | 23.0 | ... | 27.0 | -18.000000 | 0.561537 | -1.465116 | -0.017442 | -0.269166 | -0.110965 | -0.026810 | 0.670284 | 1.0 |
| 3 | 170 | 2021-01-08 02:00:00 | 50 | 24.0 | 24.0 | 4.0 | 162.0 | 63.0 | 32.0 | 8.0 | ... | 25.0 | -26.000000 | 0.891341 | -0.741231 | -0.406555 | -0.406555 | -0.110965 | 0.702128 | 1.279001 | 2.0 |
| 4 | 171 | 2021-01-08 03:00:00 | 50 | 9.0 | 12.0 | 12.0 | 243.0 | 24.0 | 24.0 | 27.0 | ... | 31.0 | -15.972136 | 1.663525 | -0.630481 | -0.373850 | -0.373850 | -0.110965 | 0.432843 | 1.254480 | 2.0 |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 247120 | 24808 | 2023-10-31 16:00:00 | 246 | 937.0 | 833.0 | 866.0 | 701.0 | 161.0 | 1060.0 | 1083.0 | ... | 18.0 | -155.248275 | 0.388287 | -0.356099 | 0.135241 | 0.135241 | 0.053212 | -0.073461 | -0.347820 | 12.0 |
| 247121 | 24809 | 2023-10-31 17:00:00 | 246 | 884.0 | 797.0 | 842.0 | 560.0 | 75.0 | 937.0 | 979.0 | ... | -7.0 | -110.884152 | 0.401069 | -0.358260 | 0.141407 | 0.141407 | 0.077524 | -0.060138 | -0.317217 | 12.0 |
| 247122 | 24810 | 2023-10-31 18:00:00 | 246 | 843.0 | 766.0 | 772.0 | 324.0 | 37.0 | 884.0 | 959.0 | ... | 68.0 | 38.090968 | 0.503437 | -0.384094 | 0.142216 | 0.142216 | 0.048863 | -0.096132 | -0.416259 | 13.0 |
| 247123 | 24811 | 2023-10-31 19:00:00 | 246 | 735.0 | 764.0 | 701.0 | 161.0 | 32.0 | 843.0 | 791.0 | ... | -20.0 | 24.437373 | 0.209153 | -0.370867 | 0.126136 | 0.126136 | 0.053884 | -0.069905 | -0.314286 | 13.0 |
| 247124 | 24812 | 2023-10-31 20:00:00 | 246 | 510.0 | 673.0 | 560.0 | 75.0 | 20.0 | 735.0 | 537.0 | ... | -20.0 | 97.248289 | 0.209153 | -0.356388 | 0.124244 | 0.124244 | 0.074379 | -0.045135 | -0.314286 | 13.0 |
247125 rows × 156 columns
In [ ]:
working_directory = '/home/optimusprime/Desktop/Training_Prediction_4_5_6'
os.chdir(working_directory)
In [ ]:
first_idx = df.loc[df['group'] == 141, 'time_idx'].iloc[0]
total_time_idx = df[df.group == 141].shape[0]
reduced_df_time_idx = [*range(int((1-dataset_size)*total_time_idx), total_time_idx+1, 1)]
df = df[df['time_idx'].isin(reduced_df_time_idx)]
In [ ]:
numeric_cols = list(df.columns)
numeric_cols = [e for e in numeric_cols if e not in ('time_idx', 'datetime', 'group', 'target')]
print(numeric_cols)
['inflow_lag_0', 'inflow_lag_1', 'inflow_lag_2', 'inflow_lag_3', 'outflow_lag_0', 'outflow_lag_1', 'outflow_lag_2', 'outflow_lag_3', 'inflow', 'inflow_diff1_per1', 'inflow_diff1_per2', 'inflow_diff2_per1', 'inflow_lag_0__fft_coefficient__attr_"real"__coeff_0', 'inflow_lag_0__fft_coefficient__attr_"real"__coeff_1', 'inflow_lag_0__skewness', 'inflow_lag_0__count_above_mean', 'inflow_lag_0__autocorrelation__lag_1', 'inflow_lag_0__count_below_mean', 'inflow_lag_0__autocorrelation__lag_2', 'inflow_lag_0__autocorrelation__lag_3', 'inflow_lag_0__partial_autocorrelation__lag_2', 'inflow_lag_0__partial_autocorrelation__lag_1', 'inflow_lag_0__partial_autocorrelation__lag_3', 'inflow_lag_0__kurtosis', 'inflow_lag_1__fft_coefficient__attr_"real"__coeff_0', 'inflow_lag_1__fft_coefficient__attr_"real"__coeff_1', 'inflow_lag_1__skewness', 'inflow_lag_1__count_above_mean', 'inflow_lag_1__autocorrelation__lag_1', 'inflow_lag_1__count_below_mean', 'inflow_lag_1__autocorrelation__lag_2', 'inflow_lag_1__autocorrelation__lag_3', 'inflow_lag_1__partial_autocorrelation__lag_2', 'inflow_lag_1__partial_autocorrelation__lag_1', 'inflow_lag_1__partial_autocorrelation__lag_3', 'inflow_lag_2__fft_coefficient__attr_"real"__coeff_0', 'inflow_lag_2__fft_coefficient__attr_"real"__coeff_1', 'inflow_lag_2__skewness', 'inflow_lag_2__count_above_mean', 'inflow_lag_2__autocorrelation__lag_1', 'inflow_lag_2__count_below_mean', 'inflow_lag_2__autocorrelation__lag_2', 'inflow_lag_2__partial_autocorrelation__lag_1', 'inflow_lag_2__partial_autocorrelation__lag_2', 'inflow_lag_2__autocorrelation__lag_3', 'inflow_lag_2__partial_autocorrelation__lag_3', 'inflow_lag_2__kurtosis', 'inflow_lag_3__fft_coefficient__attr_"real"__coeff_0', 'inflow_lag_3__fft_coefficient__attr_"real"__coeff_1', 'inflow_lag_3__skewness', 'inflow_lag_3__count_above_mean', 'inflow_lag_3__autocorrelation__lag_1', 'inflow_lag_3__count_below_mean', 'inflow_lag_3__autocorrelation__lag_2', 'inflow_lag_3__autocorrelation__lag_3', 'inflow_lag_3__partial_autocorrelation__lag_2', 'inflow_lag_3__partial_autocorrelation__lag_1', 'inflow_lag_3__partial_autocorrelation__lag_3', 'inflow_lag_3__kurtosis', 'outflow_lag_0__fft_coefficient__attr_"real"__coeff_0', 'outflow_lag_0__fft_coefficient__attr_"real"__coeff_1', 'outflow_lag_0__skewness', 'outflow_lag_0__kurtosis', 'outflow_lag_0__count_above_mean', 'outflow_lag_0__count_below_mean', 'outflow_lag_0__autocorrelation__lag_1', 'outflow_lag_0__autocorrelation__lag_2', 'outflow_lag_0__autocorrelation__lag_3', 'outflow_lag_0__partial_autocorrelation__lag_1', 'outflow_lag_0__partial_autocorrelation__lag_2', 'outflow_lag_0__partial_autocorrelation__lag_3', 'outflow_lag_1__fft_coefficient__attr_"real"__coeff_0', 'outflow_lag_1__fft_coefficient__attr_"real"__coeff_1', 'outflow_lag_1__skewness', 'outflow_lag_1__kurtosis', 'outflow_lag_1__count_above_mean', 'outflow_lag_1__count_below_mean', 'outflow_lag_1__autocorrelation__lag_1', 'outflow_lag_1__autocorrelation__lag_2', 'outflow_lag_1__autocorrelation__lag_3', 'outflow_lag_1__partial_autocorrelation__lag_1', 'outflow_lag_1__partial_autocorrelation__lag_2', 'outflow_lag_1__partial_autocorrelation__lag_3', 'outflow_lag_2__fft_coefficient__attr_"real"__coeff_0', 'outflow_lag_2__fft_coefficient__attr_"real"__coeff_1', 'outflow_lag_2__skewness', 'outflow_lag_2__kurtosis', 'outflow_lag_2__count_above_mean', 'outflow_lag_2__count_below_mean', 'outflow_lag_2__autocorrelation__lag_1', 'outflow_lag_2__autocorrelation__lag_2', 'outflow_lag_2__autocorrelation__lag_3', 'outflow_lag_2__partial_autocorrelation__lag_1', 'outflow_lag_2__partial_autocorrelation__lag_2', 'outflow_lag_2__partial_autocorrelation__lag_3', 'outflow_lag_3__fft_coefficient__attr_"real"__coeff_0', 'outflow_lag_3__fft_coefficient__attr_"real"__coeff_1', 'outflow_lag_3__skewness', 'outflow_lag_3__kurtosis', 'outflow_lag_3__count_above_mean', 'outflow_lag_3__count_below_mean', 'outflow_lag_3__autocorrelation__lag_1', 'outflow_lag_3__autocorrelation__lag_2', 'outflow_lag_3__autocorrelation__lag_3', 'outflow_lag_3__partial_autocorrelation__lag_1', 'outflow_lag_3__partial_autocorrelation__lag_2', 'outflow_lag_3__partial_autocorrelation__lag_3', 'inflow__fft_coefficient__attr_"real"__coeff_0', 'inflow__fft_coefficient__attr_"real"__coeff_1', 'inflow__skewness', 'inflow__count_above_mean', 'inflow__autocorrelation__lag_1', 'inflow__count_below_mean', 'inflow__autocorrelation__lag_2', 'inflow__autocorrelation__lag_3', 'inflow__partial_autocorrelation__lag_2', 'inflow__partial_autocorrelation__lag_1', 'inflow__partial_autocorrelation__lag_3', 'inflow__kurtosis', 'inflow_diff1_per1__fft_coefficient__attr_"real"__coeff_0', 'inflow_diff1_per1__skewness', 'inflow_diff1_per1__kurtosis', 'inflow_diff1_per1__autocorrelation__lag_2', 'inflow_diff1_per1__autocorrelation__lag_1', 'inflow_diff1_per1__partial_autocorrelation__lag_2', 'inflow_diff1_per1__partial_autocorrelation__lag_1', 'inflow_diff1_per1__autocorrelation__lag_3', 'inflow_diff1_per1__partial_autocorrelation__lag_3', 'inflow_diff1_per1__count_below_mean', 'inflow_diff1_per1__count_above_mean', 'inflow_diff1_per2__fft_coefficient__attr_"real"__coeff_0', 'inflow_diff1_per2__fft_coefficient__attr_"real"__coeff_1', 'inflow_diff1_per2__skewness', 'inflow_diff1_per2__autocorrelation__lag_2', 'inflow_diff1_per2__autocorrelation__lag_1', 'inflow_diff1_per2__autocorrelation__lag_3', 'inflow_diff1_per2__partial_autocorrelation__lag_1', 'inflow_diff1_per2__partial_autocorrelation__lag_2', 'inflow_diff1_per2__count_below_mean', 'inflow_diff1_per2__count_above_mean', 'inflow_diff1_per2__kurtosis', 'inflow_diff1_per2__partial_autocorrelation__lag_3', 'inflow_diff2_per1__fft_coefficient__attr_"real"__coeff_0', 'inflow_diff2_per1__fft_coefficient__attr_"real"__coeff_1', 'inflow_diff2_per1__kurtosis', 'inflow_diff2_per1__autocorrelation__lag_2', 'inflow_diff2_per1__autocorrelation__lag_1', 'inflow_diff2_per1__partial_autocorrelation__lag_1', 'inflow_diff2_per1__partial_autocorrelation__lag_3', 'inflow_diff2_per1__autocorrelation__lag_3', 'inflow_diff2_per1__skewness', 'inflow_diff2_per1__count_above_mean']
In [ ]:
#################### add date information ts ####################
# create datetime features from datetime column in raw_feat_df
df['_hour_of_day'] = df["datetime"].dt.hour.astype(str)
df['_day_of_week'] = df["datetime"].dt.dayofweek.astype(str)
df['_day_of_month'] = df["datetime"].dt.day.astype(str)
df['_day_of_year'] = df["datetime"].dt.dayofyear.astype(str)
df['_week_of_year'] = df["datetime"].dt.weekofyear.astype(str)
df['_month_of_year'] = df["datetime"].dt.month.astype(str)
df['_year'] = df["datetime"].dt.year.astype(str)
#################### add date information ts ####################
hour_of_day_nunique = df['_hour_of_day'].nunique()
day_of_week_nunique = df['_day_of_week'].nunique()
day_of_month_nunique = df['_day_of_month'].nunique()
day_of_year_nunique = df['_day_of_year'].nunique()
week_of_year_nunique = df['_week_of_year'].nunique()
month_of_year_nunique = df['_month_of_year'].nunique()
year_nunique = df['_year'].nunique()
In [ ]:
df
Out[ ]:
| time_idx | datetime | group | target | inflow_lag_0 | inflow_lag_1 | inflow_lag_2 | inflow_lag_3 | outflow_lag_0 | outflow_lag_1 | ... | inflow_diff2_per1__autocorrelation__lag_3 | inflow_diff2_per1__skewness | inflow_diff2_per1__count_above_mean | _hour_of_day | _day_of_week | _day_of_month | _day_of_year | _week_of_year | _month_of_year | _year | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 2311 | 2478 | 2021-04-14 06:00:00 | 50 | 411.0 | 120.0 | 322.0 | 406.0 | 321.0 | 272.0 | 505.0 | ... | -0.090886 | -0.383398 | 15.0 | 6 | 2 | 14 | 104 | 15 | 4 | 2021 |
| 2312 | 2479 | 2021-04-14 07:00:00 | 50 | 501.0 | 225.0 | 346.0 | 460.0 | 341.0 | 411.0 | 518.0 | ... | -0.086888 | -0.294891 | 14.0 | 7 | 2 | 14 | 104 | 15 | 4 | 2021 |
| 2313 | 2480 | 2021-04-14 08:00:00 | 50 | 474.0 | 314.0 | 347.0 | 527.0 | 371.0 | 501.0 | 482.0 | ... | -0.180060 | -0.306350 | 14.0 | 8 | 2 | 14 | 104 | 15 | 4 | 2021 |
| 2314 | 2481 | 2021-04-14 09:00:00 | 50 | 492.0 | 338.0 | 370.0 | 518.0 | 434.0 | 474.0 | 460.0 | ... | -0.164683 | -0.484318 | 15.0 | 9 | 2 | 14 | 104 | 15 | 4 | 2021 |
| 2315 | 2482 | 2021-04-14 10:00:00 | 50 | 463.0 | 398.0 | 406.0 | 545.0 | 457.0 | 492.0 | 459.0 | ... | -0.097465 | -0.536210 | 15.0 | 10 | 2 | 14 | 104 | 15 | 4 | 2021 |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 247098 | 24786 | 2023-10-30 18:00:00 | 246 | 791.0 | 772.0 | 616.0 | 278.0 | 41.0 | 959.0 | 627.0 | ... | -0.339896 | -0.911517 | 15.0 | 18 | 0 | 30 | 303 | 44 | 10 | 2023 |
| 247099 | 24787 | 2023-10-30 19:00:00 | 246 | 537.0 | 701.0 | 553.0 | 158.0 | 34.0 | 791.0 | 557.0 | ... | -0.339944 | -0.846031 | 15.0 | 19 | 0 | 30 | 303 | 44 | 10 | 2023 |
| 247100 | 24788 | 2023-10-30 20:00:00 | 246 | 358.0 | 560.0 | 426.0 | 97.0 | 41.0 | 537.0 | 409.0 | ... | -0.296306 | -0.805763 | 15.0 | 20 | 0 | 30 | 303 | 44 | 10 | 2023 |
| 247101 | 24789 | 2023-10-30 21:00:00 | 246 | 161.0 | 324.0 | 278.0 | 41.0 | 98.0 | 358.0 | 245.0 | ... | -0.289268 | -0.880379 | 15.0 | 21 | 0 | 30 | 303 | 44 | 10 | 2023 |
| 247102 | 24790 | 2023-10-30 22:00:00 | 246 | 101.0 | 161.0 | 158.0 | 34.0 | 275.0 | 161.0 | 151.0 | ... | -0.313281 | -0.897222 | 15.0 | 22 | 0 | 30 | 303 | 44 | 10 | 2023 |
223130 rows × 163 columns
In [ ]:
print(hour_of_day_nunique)
print(day_of_week_nunique)
print(day_of_month_nunique)
print(day_of_year_nunique)
print(week_of_year_nunique)
print(month_of_year_nunique)
print(year_nunique)
24 7 31 365 52 12 3
In [ ]:
unique_values_group_no_ts_fresh = list(df['group'].unique())
print(len(unique_values_group_no_ts_fresh), "\n")
print(unique_values_group_no_ts_fresh)
10 [50, 74, 120, 125, 141, 151, 232, 238, 239, 246]
In [ ]:
encoder_len = 8 # looks back 8 hours in the past for prediction
pred_len = 1 # 1, 6, 12 # how many timeseteps you want to predict in the future (forecasting), 1 means present (nowcasting)
#cov_lag_len= pred_len
# not using covariates yet, so put as 0
cov_lag_len= 0 #we can use forecasted values, even for inflow
####### train val test split of tsfresh dataframe #########
### you want a the idx for each train, validation and test
### train = 0.7 of each zone (eg. time idx 167 to roughly 20000) eg train = 0.7 * (24812-167)
### validation = 0.2 of each zone (eg. time idx 20000 to 22000)
### test = 0.1 of each zone (eg. time idx 22000 to 24000)
Target = 'target'
total_time_idx = df[df.group == 141].shape[0]
first_idx = df.loc[df['group'] == 141, 'time_idx'].iloc[0]
tr_stop_idx = int(0.7*total_time_idx)
val_start_idx = tr_stop_idx - encoder_len
# val_stop_idx = total_time_idx-1
val_stop_idx = val_start_idx + int(0.2*total_time_idx) # use (tr_stop_idx - encoder_len) as encoder from train set for validatoin
tes_start_idx = val_stop_idx - encoder_len
tes_stop_idx = total_time_idx-1
# tes_start_idx = tr_stop_idx - encoder_len
# tes_stop_idx = total_time_idx-1
print(tr_stop_idx)
print(val_start_idx)
print(val_stop_idx)
print(tes_start_idx)
print(tes_stop_idx)
15619 15611 20073 20065 22312
In [ ]:
train_time_idx = [*range(first_idx, first_idx+tr_stop_idx+1, 1)]
print(train_time_idx[0], train_time_idx[-1])
val_time_idx = [*range(first_idx+val_start_idx, first_idx+val_stop_idx+1, 1)]
print(val_time_idx[0], val_time_idx[-1])
test_time_idx = [*range(first_idx+tes_start_idx, first_idx+tes_stop_idx+1, 1)]
print(test_time_idx[0], test_time_idx[-1])
2478 18097 18089 22551 22543 24790
In [ ]:
"""
set inputs here
(hyperparameters grid search)
"""
######### Network Architecture ###################
# p = 10 # patience no. of epochs
Loss=NegativeBinomialDistributionLoss() # because we assume that the demand follows a negative bionmial distribution
######### Network Architecture ###################
######### Training Routine ###################
fdv_steps = 10 # fast_dev_run -> small set of training to see if training is happening correctly (one of the params in deepar model)
######### Training Routine ###################
# this is for naive forecast, getting past week's demand and predicting purely based on that
############## Inputs for 2) Persistance model ( seasonal naive forecast ) #######################
season_len = 168 # length of season ()
num_past_seas = 6 # number of past seasons to use in averaging (look at 6 week's back, getting average of these 6 values)
#seas_pred_strt_idx = 2035 # seasonal naive forecast start index, in hours use the df dataframe
############## Inputs for 2) Persistance model ( seasonal naive forecast ) #######################
In [ ]:
lr_logger = LearningRateMonitor()
cat_dict = {"_hour_of_day": NaNLabelEncoder(add_nan=True).fit(df._hour_of_day),
"_day_of_week": NaNLabelEncoder(add_nan=True).fit(df._day_of_week), "_day_of_month" : NaNLabelEncoder(add_nan=True).fit(df._day_of_month), "_day_of_year" : NaNLabelEncoder(add_nan=True).fit(df._day_of_year),
"_week_of_year": NaNLabelEncoder(add_nan=True).fit(df._week_of_year), "_month_of_year": NaNLabelEncoder(add_nan=True).fit(df._month_of_year) ,"_year": NaNLabelEncoder(add_nan=True).fit(df._year)}
cat_list = ["_hour_of_day","_day_of_week","_day_of_month","_day_of_year","_week_of_year","_month_of_year","_year"]
embed_size_dict = {'_hour_of_day':(hour_of_day_nunique+1,5),'_day_of_week':(day_of_week_nunique+1,5),'_day_of_month':(day_of_month_nunique+1,5), '_day_of_year':(day_of_year_nunique+1,8), '_week_of_year':(week_of_year_nunique+1,3), '_month_of_year':(month_of_year_nunique+1,3),'_year':(year_nunique+1,1)}
train_dataset = TimeSeriesDataSet(
df[(df['time_idx'].isin(train_time_idx))],
time_idx="time_idx",
target=Target,
categorical_encoders=cat_dict,
group_ids=["group"],
min_encoder_length=encoder_len,
max_encoder_length=encoder_len,
min_prediction_length=pred_len,
max_prediction_length=pred_len,
time_varying_unknown_reals=[Target],
time_varying_known_reals=numeric_cols,
time_varying_known_categoricals=cat_list,
add_relative_time_idx=False,
randomize_length=False,
scalers={},
target_normalizer=TorchNormalizer(method="identity",center=False,transformation=None)
)
val_dataset = TimeSeriesDataSet.from_dataset(train_dataset,df[(df['time_idx'].isin(val_time_idx))], stop_randomization=True, predict=False)
# test_dataset = TimeSeriesDataSet.from_dataset(train_dataset,df[df['time_idx'].isin(test_time_idx)], stop_randomization=True)
train_dataloader = train_dataset.to_dataloader(train=True, batch_size=bat)
val_dataloader = val_dataset.to_dataloader(train=False, batch_size=bat)
# test_dataloader = test_dataset.to_dataloader(train=False, batch_size=bat)
In [ ]:
"""
Machine Learning predictions START
1) DeepAR
"""
trainer = pl.Trainer(
max_epochs=num_ep,
accelerator='gpu',
devices=[0],
auto_lr_find=False,
gradient_clip_val=0.1,
limit_train_batches=1.0,
limit_val_batches=1.0,
#fast_dev_run=fdv_steps,
logger=True,
#log_every_n_steps=10,
# profiler=True,
callbacks=[lr_logger]#, early_stop_callback],
#enable_checkpointing=True,
#default_root_dir="C:\Work\WORK_PACKAGE\Demand_forecasting\github\DeepAR-pytorch\My_model\2_freq_nbinom_LSTM\1_cluster_demand_prediction\logs"
)
#print(f"training routing:\n \n {trainer}")
deepar =DeepAR.from_dataset(
train_dataset,
learning_rate=lr,
hidden_size=neu,
rnn_layers=lay,
dropout=drop,
loss=Loss,
log_interval=20,
log_val_interval=6,
log_gradient_flow=False,
embedding_sizes=embed_size_dict
# reduce_on_plateau_patience=3,
)
#print(f"Number of parameters in network: {deepar.size()/1e3:.1f}k")
# print(f"Model :\n \n {deepar}")
torch.set_num_threads(10)
trainer.fit(
deepar,
train_dataloaders=train_dataloader,
val_dataloaders=val_dataloader,
)
########## Prediction #####################
for group in unique_values_group_no_ts_fresh:
test_dataset = TimeSeriesDataSet.from_dataset(train_dataset,df[df['time_idx'].isin(test_time_idx) & (df['group'] == group)], stop_randomization=True)
test_dataloader = test_dataset.to_dataloader(train=False, batch_size=bat)
test_output = deepar.predict(data=test_dataloader,mode='prediction',return_index=True,num_workers=8,show_progress_bar=True)
pred_start_idx = int(test_output[1]['time_idx'][0])
actual1_full = np.array([])
pred_full = np.array([])
RMSE_list = np.array([])
WMAPE_list = np.array([])
days = tes_stop_idx - val_stop_idx - pred_len - pred_len
for j in range(0,days,pred_len):
# if j == 0:
# print('pred_start_idx = ',df["datetime"].iloc[pred_start_idx],pred_start_idx )
actual_df = df[df['group'] == group]
prediction_index = [*range(pred_start_idx+(j), pred_start_idx+(j)+pred_len, 1)]
actual = actual_df[actual_df['time_idx'].isin(prediction_index)]['target'].values
actual1_full = np.append(actual1_full, actual)
#plt.plot(actual,'*-')
pred = np.array(np.rint(test_output[0][j])).astype(int)
pred_full = np.append(pred_full, pred)
#plt.plot(pred,'^-')
#plt.show()
absolute_error = np.abs(actual - pred)
absolute_sum = np.abs(actual) + np.abs(pred)
WMAPE = np.mean(absolute_error / (absolute_sum + 1e-8)) * 100
# WMAPE = np.mean(np.abs(actual-pred)/np.abs(actual)+ 1e-8) * 100
RMSE = np.sqrt(mean_squared_error(actual,pred ))
#print('RMSE : ',RMSE)
RMSE_list = np.append(RMSE_list,RMSE)
WMAPE_list = np.append(WMAPE_list,WMAPE)
plt.figure(figsize=(25,5))
plt.plot(actual1_full.flatten(),'^-', label='Actual')
plt.plot(pred_full.flatten(),'*-', label='Predicted')
plt.title('Zone ' + str(group)) # Replace 'XYZ' with your actual group number
plt.legend()
plt.show()
print(f'Average RMSE for {days} days: ',np.mean(RMSE_list))
print('full average RMSE = ',np.sqrt(mean_squared_error(actual1_full.flatten(),pred_full.flatten() )))
print(f'Average WMAPE for {days} days: ',np.mean(WMAPE_list))
wmape_full = np.mean(np.abs(actual1_full.flatten() - pred_full.flatten()) / (np.abs(actual1_full.flatten()) + np.abs(pred_full.flatten()) + 1e-8)) * 100
print('full average WMAPE = ', wmape_full, '\n')
########## Prediction #####################
"""
Machine Learning predictions END
"""
GPU available: True (cuda), used: True TPU available: False, using: 0 TPU cores IPU available: False, using: 0 IPUs HPU available: False, using: 0 HPUs `Trainer(limit_train_batches=1.0)` was configured so 100% of the batches per epoch will be used.. `Trainer(limit_val_batches=1.0)` was configured so 100% of the batches will be used.. LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0] | Name | Type | Params ---------------------------------------------------------------------------- 0 | loss | NegativeBinomialDistributionLoss | 0 1 | logging_metrics | ModuleList | 0 2 | embeddings | MultiEmbedding | 3.5 K 3 | rnn | LSTM | 1.0 M 4 | distribution_projector | Linear | 422 ---------------------------------------------------------------------------- 1.0 M Trainable params 0 Non-trainable params 1.0 M Total params 4.179 Total estimated model params size (MB)
Epoch 0: 20%|██ | 1262/6271 [00:21<01:26, 58.09it/s, loss=7.05, v_num=1, train_loss_step=7.100]
Epoch 29: 100%|██████████| 6271/6271 [02:12<00:00, 47.42it/s, loss=5.35, v_num=1, train_loss_step=5.250, val_loss=5.280, train_loss_epoch=5.310]
`Trainer.fit` stopped: `max_epochs=30` reached.
Epoch 29: 100%|██████████| 6271/6271 [02:12<00:00, 47.41it/s, loss=5.35, v_num=1, train_loss_step=5.250, val_loss=5.280, train_loss_epoch=5.310]
Predict: 100%|██████████| 70/70 [00:07<00:00, 9.74 batches/s]
Average RMSE for 2237 days: 73.99329459097005 full average RMSE = 101.49062245416181 Average WMAPE for 2237 days: 9.373556518029122 full average WMAPE = 9.37355651802912
Predict: 100%|██████████| 70/70 [00:12<00:00, 5.51 batches/s]
Average RMSE for 2237 days: 25.555654894948592 full average RMSE = 45.463195015618545 Average WMAPE for 2237 days: 13.593618878411759 full average WMAPE = 13.593618878411759
Predict: 100%|██████████| 70/70 [00:12<00:00, 5.59 batches/s]
Average RMSE for 2237 days: 33.87483236477425 full average RMSE = 66.59161025982159 Average WMAPE for 2237 days: 14.229930988026428 full average WMAPE = 14.229930988026426
Predict: 100%|██████████| 70/70 [00:13<00:00, 5.27 batches/s]
Average RMSE for 2237 days: 25.544926240500672 full average RMSE = 41.83676262806599 Average WMAPE for 2237 days: 14.26932641528523 full average WMAPE = 14.26932641528523
Predict: 100%|██████████| 70/70 [00:07<00:00, 8.90 batches/s]
Average RMSE for 2237 days: 94.72507822977201 full average RMSE = 129.16603654895067 Average WMAPE for 2237 days: 9.402972723272702 full average WMAPE = 9.402972723272702
Predict: 100%|██████████| 70/70 [00:09<00:00, 7.02 batches/s]
Average RMSE for 2237 days: 45.54447921323201 full average RMSE = 68.47043679752328 Average WMAPE for 2237 days: 14.034264040471363 full average WMAPE = 14.034264040471362
Predict: 100%|██████████| 70/70 [00:13<00:00, 5.24 batches/s]
Average RMSE for 2237 days: 24.057666517657577 full average RMSE = 31.563610550001535 Average WMAPE for 2237 days: 12.749907606489481 full average WMAPE = 12.749907606489478
Predict: 100%|██████████| 70/70 [00:07<00:00, 9.39 batches/s]
Average RMSE for 2237 days: 96.0585605721949 full average RMSE = 137.632970613551 Average WMAPE for 2237 days: 10.072097303880529 full average WMAPE = 10.07209730388053
Predict: 100%|██████████| 70/70 [00:07<00:00, 8.77 batches/s]
Average RMSE for 2237 days: 64.90567724631202 full average RMSE = 95.46294981647092 Average WMAPE for 2237 days: 11.085441602762529 full average WMAPE = 11.085441602762529
Predict: 100%|██████████| 70/70 [00:07<00:00, 9.85 batches/s]
Average RMSE for 2237 days: 86.37058560572194 full average RMSE = 120.05994442279882 Average WMAPE for 2237 days: 9.477256027608103 full average WMAPE = 9.477256027608103
Out[ ]:
'\nMachine Learning predictions END\n'
Experiment 4¶
In [ ]:
### Best hyperparams
neu = 810
lay = 1
bat = 384
lr = 0.0053935887501248515
drop = 0.4
num_ep = standardised_num_ep
dataset_size = reduced_dataset_size
In [ ]:
### Your code
os.chdir('/home/optimusprime/Desktop/Data_Timothy')
############ Basically df = raw_feat_df <-------------------------
df = pd.read_parquet('EXP_4_combined_dd_ss_all_covariates_encoded_further_preprocessed.parquet', engine='pyarrow')
df
Out[ ]:
| time_idx | datetime | group | target | inflow_lag_0 | inflow_lag_1 | inflow_lag_2 | inflow_lag_3 | outflow_lag_0 | outflow_lag_1 | ... | wind_speed | rain_1h | temp | clouds_all | snow_3h | rain_3h | humidity | snow_1h | weather_description | weather_main | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 167 | 2021-01-07 23:00:00 | 4 | 0.0 | 10.0 | 8.0 | 1.0 | 1.0 | 5.0 | 3.0 | ... | 7.70 | 0.0 | 276.41 | 0 | 0.0 | 0.0 | 53 | 0.0 | 16 | 0 |
| 1 | 168 | 2021-01-08 00:00:00 | 4 | 1.0 | 4.0 | 13.0 | 0.0 | 2.0 | 0.0 | 1.0 | ... | 8.23 | 0.0 | 275.35 | 0 | 0.0 | 0.0 | 56 | 0.0 | 16 | 0 |
| 2 | 169 | 2021-01-08 01:00:00 | 4 | 2.0 | 1.0 | 9.0 | 0.0 | 0.0 | 1.0 | 2.0 | ... | 6.70 | 0.0 | 274.53 | 0 | 0.0 | 0.0 | 61 | 0.0 | 16 | 0 |
| 3 | 170 | 2021-01-08 02:00:00 | 4 | 0.0 | 1.0 | 5.0 | 0.0 | 1.0 | 2.0 | 3.0 | ... | 6.20 | 0.0 | 274.13 | 0 | 0.0 | 0.0 | 63 | 0.0 | 16 | 0 |
| 4 | 171 | 2021-01-08 03:00:00 | 4 | 0.0 | 3.0 | 2.0 | 1.0 | 1.0 | 0.0 | 0.0 | ... | 4.63 | 0.0 | 273.48 | 0 | 0.0 | 0.0 | 64 | 0.0 | 16 | 0 |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 1631191 | 24808 | 2023-10-31 16:00:00 | 263 | 126.0 | 176.0 | 136.0 | 23.0 | 226.0 | 142.0 | 113.0 | ... | 4.63 | 0.0 | 282.29 | 20 | 0.0 | 0.0 | 50 | 0.0 | 2 | 1 |
| 1631192 | 24809 | 2023-10-31 17:00:00 | 263 | 112.0 | 153.0 | 144.0 | 9.0 | 185.0 | 126.0 | 82.0 | ... | 4.12 | 0.0 | 283.77 | 20 | 0.0 | 0.0 | 47 | 0.0 | 2 | 1 |
| 1631193 | 24810 | 2023-10-31 18:00:00 | 263 | 99.0 | 135.0 | 94.0 | 7.0 | 136.0 | 112.0 | 77.0 | ... | 3.60 | 0.0 | 283.52 | 75 | 0.0 | 0.0 | 50 | 0.0 | 0 | 1 |
| 1631194 | 24811 | 2023-10-31 19:00:00 | 263 | 60.0 | 119.0 | 62.0 | 4.0 | 85.0 | 99.0 | 30.0 | ... | 4.12 | 0.0 | 283.69 | 100 | 0.0 | 0.0 | 48 | 0.0 | 14 | 1 |
| 1631195 | 24812 | 2023-10-31 20:00:00 | 263 | 50.0 | 119.0 | 23.0 | 4.0 | 42.0 | 60.0 | 10.0 | ... | 3.60 | 0.0 | 283.62 | 100 | 0.0 | 0.0 | 49 | 0.0 | 14 | 1 |
1631196 rows × 171 columns
In [ ]:
working_directory = '/home/optimusprime/Desktop/Training_Prediction_4_5_6'
os.chdir(working_directory)
In [ ]:
first_idx = df['time_idx'][0]
total_time_idx = df[df.group == 13].shape[0]
reduced_df_time_idx = [*range(int((1-dataset_size)*total_time_idx), total_time_idx+1, 1)]
df = df[df['time_idx'].isin(reduced_df_time_idx)]
In [ ]:
numeric_cols = list(df.columns)
numeric_cols = [e for e in numeric_cols if e not in ('time_idx', 'datetime', 'group', 'target')]
print(numeric_cols)
['inflow_lag_0', 'inflow_lag_1', 'inflow_lag_2', 'inflow_lag_3', 'outflow_lag_0', 'outflow_lag_1', 'outflow_lag_2', 'outflow_lag_3', 'inflow', 'inflow_diff1_per1', 'inflow_diff1_per2', 'inflow_diff2_per1', 'inflow_lag_0__fft_coefficient__attr_"real"__coeff_0', 'inflow_lag_0__fft_coefficient__attr_"real"__coeff_1', 'inflow_lag_0__skewness', 'inflow_lag_0__kurtosis', 'inflow_lag_0__count_above_mean', 'inflow_lag_0__count_below_mean', 'inflow_lag_0__autocorrelation__lag_1', 'inflow_lag_0__autocorrelation__lag_2', 'inflow_lag_0__autocorrelation__lag_3', 'inflow_lag_0__partial_autocorrelation__lag_1', 'inflow_lag_0__partial_autocorrelation__lag_2', 'inflow_lag_0__partial_autocorrelation__lag_3', 'inflow_lag_1__fft_coefficient__attr_"real"__coeff_0', 'inflow_lag_1__fft_coefficient__attr_"real"__coeff_1', 'inflow_lag_1__skewness', 'inflow_lag_1__kurtosis', 'inflow_lag_1__count_above_mean', 'inflow_lag_1__count_below_mean', 'inflow_lag_1__autocorrelation__lag_1', 'inflow_lag_1__autocorrelation__lag_2', 'inflow_lag_1__autocorrelation__lag_3', 'inflow_lag_1__partial_autocorrelation__lag_1', 'inflow_lag_1__partial_autocorrelation__lag_2', 'inflow_lag_1__partial_autocorrelation__lag_3', 'inflow_lag_2__fft_coefficient__attr_"real"__coeff_0', 'inflow_lag_2__fft_coefficient__attr_"real"__coeff_1', 'inflow_lag_2__skewness', 'inflow_lag_2__kurtosis', 'inflow_lag_2__count_above_mean', 'inflow_lag_2__count_below_mean', 'inflow_lag_2__autocorrelation__lag_1', 'inflow_lag_2__autocorrelation__lag_2', 'inflow_lag_2__autocorrelation__lag_3', 'inflow_lag_2__partial_autocorrelation__lag_1', 'inflow_lag_2__partial_autocorrelation__lag_2', 'inflow_lag_2__partial_autocorrelation__lag_3', 'inflow_lag_3__fft_coefficient__attr_"real"__coeff_0', 'inflow_lag_3__fft_coefficient__attr_"real"__coeff_1', 'inflow_lag_3__skewness', 'inflow_lag_3__kurtosis', 'inflow_lag_3__count_above_mean', 'inflow_lag_3__count_below_mean', 'inflow_lag_3__autocorrelation__lag_1', 'inflow_lag_3__autocorrelation__lag_2', 'inflow_lag_3__autocorrelation__lag_3', 'inflow_lag_3__partial_autocorrelation__lag_1', 'inflow_lag_3__partial_autocorrelation__lag_2', 'inflow_lag_3__partial_autocorrelation__lag_3', 'outflow_lag_0__fft_coefficient__attr_"real"__coeff_0', 'outflow_lag_0__fft_coefficient__attr_"real"__coeff_1', 'outflow_lag_0__skewness', 'outflow_lag_0__kurtosis', 'outflow_lag_0__count_above_mean', 'outflow_lag_0__count_below_mean', 'outflow_lag_0__autocorrelation__lag_1', 'outflow_lag_0__autocorrelation__lag_2', 'outflow_lag_0__autocorrelation__lag_3', 'outflow_lag_0__partial_autocorrelation__lag_1', 'outflow_lag_0__partial_autocorrelation__lag_2', 'outflow_lag_0__partial_autocorrelation__lag_3', 'outflow_lag_1__fft_coefficient__attr_"real"__coeff_0', 'outflow_lag_1__fft_coefficient__attr_"real"__coeff_1', 'outflow_lag_1__skewness', 'outflow_lag_1__kurtosis', 'outflow_lag_1__count_above_mean', 'outflow_lag_1__count_below_mean', 'outflow_lag_1__autocorrelation__lag_1', 'outflow_lag_1__autocorrelation__lag_2', 'outflow_lag_1__autocorrelation__lag_3', 'outflow_lag_1__partial_autocorrelation__lag_1', 'outflow_lag_1__partial_autocorrelation__lag_2', 'outflow_lag_1__partial_autocorrelation__lag_3', 'outflow_lag_2__fft_coefficient__attr_"real"__coeff_0', 'outflow_lag_2__fft_coefficient__attr_"real"__coeff_1', 'outflow_lag_2__skewness', 'outflow_lag_2__kurtosis', 'outflow_lag_2__count_above_mean', 'outflow_lag_2__count_below_mean', 'outflow_lag_2__autocorrelation__lag_1', 'outflow_lag_2__autocorrelation__lag_2', 'outflow_lag_2__autocorrelation__lag_3', 'outflow_lag_2__partial_autocorrelation__lag_1', 'outflow_lag_2__partial_autocorrelation__lag_2', 'outflow_lag_2__partial_autocorrelation__lag_3', 'outflow_lag_3__fft_coefficient__attr_"real"__coeff_0', 'outflow_lag_3__fft_coefficient__attr_"real"__coeff_1', 'outflow_lag_3__skewness', 'outflow_lag_3__kurtosis', 'outflow_lag_3__count_above_mean', 'outflow_lag_3__count_below_mean', 'outflow_lag_3__autocorrelation__lag_1', 'outflow_lag_3__autocorrelation__lag_2', 'outflow_lag_3__autocorrelation__lag_3', 'outflow_lag_3__partial_autocorrelation__lag_1', 'outflow_lag_3__partial_autocorrelation__lag_2', 'outflow_lag_3__partial_autocorrelation__lag_3', 'inflow__fft_coefficient__attr_"real"__coeff_0', 'inflow__fft_coefficient__attr_"real"__coeff_1', 'inflow__skewness', 'inflow__kurtosis', 'inflow__count_above_mean', 'inflow__count_below_mean', 'inflow__autocorrelation__lag_1', 'inflow__autocorrelation__lag_2', 'inflow__autocorrelation__lag_3', 'inflow__partial_autocorrelation__lag_1', 'inflow__partial_autocorrelation__lag_2', 'inflow__partial_autocorrelation__lag_3', 'inflow_diff1_per1__fft_coefficient__attr_"real"__coeff_0', 'inflow_diff1_per1__fft_coefficient__attr_"real"__coeff_1', 'inflow_diff1_per1__kurtosis', 'inflow_diff1_per1__count_above_mean', 'inflow_diff1_per1__count_below_mean', 'inflow_diff1_per1__autocorrelation__lag_1', 'inflow_diff1_per1__autocorrelation__lag_2', 'inflow_diff1_per1__autocorrelation__lag_3', 'inflow_diff1_per1__partial_autocorrelation__lag_1', 'inflow_diff1_per1__partial_autocorrelation__lag_2', 'inflow_diff1_per1__partial_autocorrelation__lag_3', 'inflow_diff1_per1__skewness', 'inflow_diff1_per2__fft_coefficient__attr_"real"__coeff_0', 'inflow_diff1_per2__fft_coefficient__attr_"real"__coeff_1', 'inflow_diff1_per2__skewness', 'inflow_diff1_per2__kurtosis', 'inflow_diff1_per2__count_above_mean', 'inflow_diff1_per2__count_below_mean', 'inflow_diff1_per2__autocorrelation__lag_1', 'inflow_diff1_per2__autocorrelation__lag_2', 'inflow_diff1_per2__autocorrelation__lag_3', 'inflow_diff1_per2__partial_autocorrelation__lag_1', 'inflow_diff1_per2__partial_autocorrelation__lag_2', 'inflow_diff1_per2__partial_autocorrelation__lag_3', 'inflow_diff2_per1__fft_coefficient__attr_"real"__coeff_1', 'inflow_diff2_per1__count_above_mean', 'inflow_diff2_per1__autocorrelation__lag_1', 'inflow_diff2_per1__autocorrelation__lag_2', 'inflow_diff2_per1__partial_autocorrelation__lag_1', 'inflow_diff2_per1__partial_autocorrelation__lag_2', 'inflow_diff2_per1__partial_autocorrelation__lag_3', 'inflow_diff2_per1__fft_coefficient__attr_"real"__coeff_0', 'inflow_diff2_per1__autocorrelation__lag_3', 'inflow_diff2_per1__count_below_mean', 'inflow_diff2_per1__kurtosis', 'ave_total_fair', 'wind_gust', 'wind_speed', 'rain_1h', 'temp', 'clouds_all', 'snow_3h', 'rain_3h', 'humidity', 'snow_1h', 'weather_description', 'weather_main']
In [ ]:
#################### add date information ts ####################
# create datetime features from datetime column in raw_feat_df
df['_hour_of_day'] = df["datetime"].dt.hour.astype(str)
df['_day_of_week'] = df["datetime"].dt.dayofweek.astype(str)
df['_day_of_month'] = df["datetime"].dt.day.astype(str)
df['_day_of_year'] = df["datetime"].dt.dayofyear.astype(str)
df['_week_of_year'] = df["datetime"].dt.weekofyear.astype(str)
df['_month_of_year'] = df["datetime"].dt.month.astype(str)
df['_year'] = df["datetime"].dt.year.astype(str)
#################### add date information ts ####################
hour_of_day_nunique = df['_hour_of_day'].nunique()
day_of_week_nunique = df['_day_of_week'].nunique()
day_of_month_nunique = df['_day_of_month'].nunique()
day_of_year_nunique = df['_day_of_year'].nunique()
week_of_year_nunique = df['_week_of_year'].nunique()
month_of_year_nunique = df['_month_of_year'].nunique()
year_nunique = df['_year'].nunique()
In [ ]:
df
Out[ ]:
| time_idx | datetime | group | target | inflow_lag_0 | inflow_lag_1 | inflow_lag_2 | inflow_lag_3 | outflow_lag_0 | outflow_lag_1 | ... | snow_1h | weather_description | weather_main | _hour_of_day | _day_of_week | _day_of_month | _day_of_year | _week_of_year | _month_of_year | _year | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 2311 | 2478 | 2021-04-14 06:00:00 | 4 | 5.0 | 3.0 | 17.0 | 8.0 | 6.0 | 4.0 | 2.0 | ... | 0.0 | 16 | 0 | 6 | 2 | 14 | 104 | 15 | 4 | 2021 |
| 2312 | 2479 | 2021-04-14 07:00:00 | 4 | 4.0 | 7.0 | 15.0 | 9.0 | 7.0 | 5.0 | 7.0 | ... | 0.0 | 16 | 0 | 7 | 2 | 14 | 104 | 15 | 4 | 2021 |
| 2313 | 2480 | 2021-04-14 08:00:00 | 4 | 1.0 | 13.0 | 12.0 | 11.0 | 11.0 | 4.0 | 3.0 | ... | 0.0 | 16 | 0 | 8 | 2 | 14 | 104 | 15 | 4 | 2021 |
| 2314 | 2481 | 2021-04-14 09:00:00 | 4 | 6.0 | 15.0 | 12.0 | 19.0 | 14.0 | 1.0 | 3.0 | ... | 0.0 | 16 | 0 | 9 | 2 | 14 | 104 | 15 | 4 | 2021 |
| 2315 | 2482 | 2021-04-14 10:00:00 | 4 | 6.0 | 12.0 | 13.0 | 14.0 | 18.0 | 6.0 | 1.0 | ... | 0.0 | 16 | 0 | 10 | 2 | 14 | 104 | 15 | 4 | 2021 |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 1631169 | 24786 | 2023-10-30 18:00:00 | 263 | 82.0 | 136.0 | 78.0 | 5.0 | 90.0 | 113.0 | 43.0 | ... | 0.0 | 12 | 5 | 18 | 0 | 30 | 303 | 44 | 10 | 2023 |
| 1631170 | 24787 | 2023-10-30 19:00:00 | 263 | 77.0 | 144.0 | 46.0 | 3.0 | 57.0 | 82.0 | 18.0 | ... | 0.0 | 12 | 5 | 19 | 0 | 30 | 303 | 44 | 10 | 2023 |
| 1631171 | 24788 | 2023-10-30 20:00:00 | 263 | 30.0 | 94.0 | 20.0 | 6.0 | 24.0 | 77.0 | 8.0 | ... | 0.0 | 14 | 1 | 20 | 0 | 30 | 303 | 44 | 10 | 2023 |
| 1631172 | 24789 | 2023-10-30 21:00:00 | 263 | 10.0 | 62.0 | 13.0 | 5.0 | 13.0 | 30.0 | 5.0 | ... | 0.0 | 9 | 6 | 21 | 0 | 30 | 303 | 44 | 10 | 2023 |
| 1631173 | 24790 | 2023-10-30 22:00:00 | 263 | 11.0 | 23.0 | 5.0 | 15.0 | 3.0 | 10.0 | 7.0 | ... | 0.0 | 14 | 1 | 22 | 0 | 30 | 303 | 44 | 10 | 2023 |
1472658 rows × 178 columns
In [ ]:
print(hour_of_day_nunique)
print(day_of_week_nunique)
print(day_of_month_nunique)
print(day_of_year_nunique)
print(week_of_year_nunique)
print(month_of_year_nunique)
print(year_nunique)
24 7 31 365 52 12 3
In [ ]:
unique_values_group_no_ts_fresh = list(df['group'].unique())
print(len(unique_values_group_no_ts_fresh), "\n")
print(unique_values_group_no_ts_fresh)
66 [4, 12, 13, 24, 41, 42, 43, 45, 48, 50, 68, 74, 75, 79, 87, 88, 90, 100, 107, 113, 114, 116, 120, 125, 127, 128, 137, 140, 141, 142, 143, 144, 148, 151, 152, 153, 158, 161, 162, 163, 164, 166, 170, 186, 194, 202, 209, 211, 224, 229, 230, 231, 232, 233, 234, 236, 237, 238, 239, 243, 244, 246, 249, 261, 262, 263]
In [ ]:
encoder_len = 8 # looks back 8 hours in the past for prediction
pred_len = 1 # 1, 6, 12 # how many timeseteps you want to predict in the future (forecasting), 1 means present (nowcasting)
#cov_lag_len= pred_len
# not using covariates yet, so put as 0
cov_lag_len= 0 #we can use forecasted values, even for inflow
####### train val test split of tsfresh dataframe #########
### you want a the idx for each train, validation and test
### train = 0.7 of each zone (eg. time idx 167 to roughly 20000) eg train = 0.7 * (24812-167)
### validation = 0.2 of each zone (eg. time idx 20000 to 22000)
### test = 0.1 of each zone (eg. time idx 22000 to 24000)
Target = 'target'
total_time_idx = df[df.group == 13].shape[0]
first_idx = df.loc[df['group'] == 13, 'time_idx'].iloc[0]
tr_stop_idx = int(0.7*total_time_idx)
val_start_idx = tr_stop_idx - encoder_len
# val_stop_idx = total_time_idx-1
val_stop_idx = val_start_idx + int(0.2*total_time_idx) # use (tr_stop_idx - encoder_len) as encoder from train set for validatoin
tes_start_idx = val_stop_idx - encoder_len
tes_stop_idx = total_time_idx-1
# tes_start_idx = tr_stop_idx - encoder_len
# tes_stop_idx = total_time_idx-1
print(tr_stop_idx)
print(val_start_idx)
print(val_stop_idx)
print(tes_start_idx)
print(tes_stop_idx)
15619 15611 20073 20065 22312
In [ ]:
train_time_idx = [*range(first_idx, first_idx+tr_stop_idx+1, 1)]
print(train_time_idx[0], train_time_idx[-1])
val_time_idx = [*range(first_idx+val_start_idx, first_idx+val_stop_idx+1, 1)]
print(val_time_idx[0], val_time_idx[-1])
test_time_idx = [*range(first_idx+tes_start_idx, first_idx+tes_stop_idx+1, 1)]
print(test_time_idx[0], test_time_idx[-1])
2478 18097 18089 22551 22543 24790
In [ ]:
"""
set inputs here
(hyperparameters grid search)
"""
######### Network Architecture ###################
# p = 10 # patience no. of epochs
Loss=NegativeBinomialDistributionLoss() # because we assume that the demand follows a negative bionmial distribution
######### Network Architecture ###################
######### Training Routine ###################
fdv_steps = 10 # fast_dev_run -> small set of training to see if training is happening correctly (one of the params in deepar model)
######### Training Routine ###################
# this is for naive forecast, getting past week's demand and predicting purely based on that
############## Inputs for 2) Persistance model ( seasonal naive forecast ) #######################
season_len = 168 # length of season ()
num_past_seas = 6 # number of past seasons to use in averaging (look at 6 week's back, getting average of these 6 values)
#seas_pred_strt_idx = 2035 # seasonal naive forecast start index, in hours use the df dataframe
############## Inputs for 2) Persistance model ( seasonal naive forecast ) #######################
In [ ]:
lr_logger = LearningRateMonitor()
cat_dict = {"_hour_of_day": NaNLabelEncoder(add_nan=True).fit(df._hour_of_day),
"_day_of_week": NaNLabelEncoder(add_nan=True).fit(df._day_of_week), "_day_of_month" : NaNLabelEncoder(add_nan=True).fit(df._day_of_month), "_day_of_year" : NaNLabelEncoder(add_nan=True).fit(df._day_of_year),
"_week_of_year": NaNLabelEncoder(add_nan=True).fit(df._week_of_year), "_month_of_year": NaNLabelEncoder(add_nan=True).fit(df._month_of_year) ,"_year": NaNLabelEncoder(add_nan=True).fit(df._year)}
cat_list = ["_hour_of_day","_day_of_week","_day_of_month","_day_of_year","_week_of_year","_month_of_year","_year"]
embed_size_dict = {'_hour_of_day':(hour_of_day_nunique+1,5),'_day_of_week':(day_of_week_nunique+1,5),'_day_of_month':(day_of_month_nunique+1,5), '_day_of_year':(day_of_year_nunique+1,8), '_week_of_year':(week_of_year_nunique+1,3), '_month_of_year':(month_of_year_nunique+1,3),'_year':(year_nunique+1,1)}
train_dataset = TimeSeriesDataSet(
df[(df['time_idx'].isin(train_time_idx))],
time_idx="time_idx",
target=Target,
categorical_encoders=cat_dict,
group_ids=["group"],
min_encoder_length=encoder_len,
max_encoder_length=encoder_len,
min_prediction_length=pred_len,
max_prediction_length=pred_len,
time_varying_unknown_reals=[Target],
time_varying_known_reals=numeric_cols,
time_varying_known_categoricals=cat_list,
add_relative_time_idx=False,
randomize_length=False,
scalers={},
target_normalizer=TorchNormalizer(method="identity",center=False,transformation=None)
)
val_dataset = TimeSeriesDataSet.from_dataset(train_dataset,df[(df['time_idx'].isin(val_time_idx))], stop_randomization=True, predict=False)
# test_dataset = TimeSeriesDataSet.from_dataset(train_dataset,df[df['time_idx'].isin(test_time_idx)], stop_randomization=True)
train_dataloader = train_dataset.to_dataloader(train=True, batch_size=bat)
val_dataloader = val_dataset.to_dataloader(train=False, batch_size=bat)
# test_dataloader = test_dataset.to_dataloader(train=False, batch_size=bat)
In [ ]:
"""
Machine Learning predictions START
1) DeepAR
"""
trainer = pl.Trainer(
max_epochs=num_ep,
accelerator='gpu',
devices=[0],
auto_lr_find=False,
gradient_clip_val=0.1,
limit_train_batches=1.0,
limit_val_batches=1.0,
#fast_dev_run=fdv_steps,
logger=True,
#log_every_n_steps=10,
# profiler=True,
callbacks=[lr_logger]#, early_stop_callback],
#enable_checkpointing=True,
#default_root_dir="C:\Work\WORK_PACKAGE\Demand_forecasting\github\DeepAR-pytorch\My_model\2_freq_nbinom_LSTM\1_cluster_demand_prediction\logs"
)
#print(f"training routing:\n \n {trainer}")
deepar =DeepAR.from_dataset(
train_dataset,
learning_rate=lr,
hidden_size=neu,
rnn_layers=lay,
dropout=drop,
loss=Loss,
log_interval=20,
log_val_interval=6,
log_gradient_flow=False,
embedding_sizes=embed_size_dict
# reduce_on_plateau_patience=3,
)
#print(f"Number of parameters in network: {deepar.size()/1e3:.1f}k")
# print(f"Model :\n \n {deepar}")
torch.set_num_threads(10)
trainer.fit(
deepar,
train_dataloaders=train_dataloader,
val_dataloaders=val_dataloader,
)
########## Prediction #####################
for group in unique_values_group_no_ts_fresh:
test_dataset = TimeSeriesDataSet.from_dataset(train_dataset,df[df['time_idx'].isin(test_time_idx) & (df['group'] == group)], stop_randomization=True)
test_dataloader = test_dataset.to_dataloader(train=False, batch_size=bat)
test_output = deepar.predict(data=test_dataloader,mode='prediction',return_index=True,num_workers=8,show_progress_bar=True)
pred_start_idx = int(test_output[1]['time_idx'][0])
actual1_full = np.array([])
pred_full = np.array([])
RMSE_list = np.array([])
WMAPE_list = np.array([])
days = tes_stop_idx - val_stop_idx - pred_len - pred_len
for j in range(0,days,pred_len):
if j == 0:
print('pred_start_idx = ',df["datetime"].iloc[pred_start_idx],pred_start_idx )
actual_df = df[df['group'] == group]
prediction_index = [*range(pred_start_idx+(j), pred_start_idx+(j)+pred_len, 1)]
actual = actual_df[actual_df['time_idx'].isin(prediction_index)]['target'].values
actual1_full = np.append(actual1_full, actual)
#plt.plot(actual,'*-')
pred = np.array(np.rint(test_output[0][j])).astype(int)
pred_full = np.append(pred_full, pred)
#plt.plot(pred,'^-')
#plt.show()
absolute_error = np.abs(actual - pred)
absolute_sum = np.abs(actual) + np.abs(pred)
WMAPE = np.mean(absolute_error / (absolute_sum + 1e-8)) * 100
# WMAPE = np.mean(np.abs(actual-pred)/np.abs(actual)+ 1e-8) * 100
RMSE = np.sqrt(mean_squared_error(actual,pred ))
#print('RMSE : ',RMSE)
RMSE_list = np.append(RMSE_list,RMSE)
WMAPE_list = np.append(WMAPE_list,WMAPE)
plt.figure(figsize=(25,5))
plt.plot(actual1_full.flatten(),'^-', label='Actual')
plt.plot(pred_full.flatten(),'*-', label='Predicted')
plt.title('Zone ' + str(group)) # Replace 'XYZ' with your actual group number
plt.legend()
plt.show()
print(f'Average RMSE for {days} days: ',np.mean(RMSE_list))
print('full average RMSE = ',np.sqrt(mean_squared_error(actual1_full.flatten(),pred_full.flatten() )))
print(f'Average WMAPE for {days} days: ',np.mean(WMAPE_list))
wmape_full = np.mean(np.abs(actual1_full.flatten() - pred_full.flatten()) / (np.abs(actual1_full.flatten()) + np.abs(pred_full.flatten()) + 1e-8)) * 100
print('full average WMAPE = ', wmape_full, '\n')
########## Prediction #####################
"""
Machine Learning predictions END
"""
GPU available: True (cuda), used: True TPU available: False, using: 0 TPU cores IPU available: False, using: 0 IPUs HPU available: False, using: 0 HPUs `Trainer(limit_train_batches=1.0)` was configured so 100% of the batches per epoch will be used.. `Trainer(limit_val_batches=1.0)` was configured so 100% of the batches will be used.. LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0] | Name | Type | Params ---------------------------------------------------------------------------- 0 | loss | NegativeBinomialDistributionLoss | 0 1 | logging_metrics | ModuleList | 0 2 | embeddings | MultiEmbedding | 3.5 K 3 | rnn | LSTM | 3.3 M 4 | distribution_projector | Linear | 1.6 K ---------------------------------------------------------------------------- 3.3 M Trainable params 0 Non-trainable params 3.3 M Total params 13.110 Total estimated model params size (MB)
Epoch 29: 100%|██████████| 3449/3449 [08:41<00:00, 6.61it/s, loss=4.21, v_num=2, train_loss_step=-8.37, val_loss=8.730, train_loss_epoch=3.960]
`Trainer.fit` stopped: `max_epochs=30` reached.
Epoch 29: 100%|██████████| 3449/3449 [08:41<00:00, 6.61it/s, loss=4.21, v_num=2, train_loss_step=-8.37, val_loss=8.730, train_loss_epoch=3.960]
Predict: 100%|██████████| 6/6 [00:05<00:00, 1.17 batches/s]
pred_start_idx = 2021-04-24 04:00:00 22551
Average RMSE for 2237 days: 2.1363433169423334 full average RMSE = 3.5765465595743136 Average WMAPE for 2237 days: 22.548920872924786 full average WMAPE = 22.54892087292479
Predict: 100%|██████████| 6/6 [00:04<00:00, 1.32 batches/s]
pred_start_idx = 2021-04-24 04:00:00 22551
Average RMSE for 2237 days: 0.9611086276262852 full average RMSE = 1.9106084569856876 Average WMAPE for 2237 days: 12.180373590499023 full average WMAPE = 12.180373590499023
Predict: 100%|██████████| 6/6 [00:04<00:00, 1.24 batches/s]
pred_start_idx = 2021-04-24 04:00:00 22551
Average RMSE for 2237 days: 5.1032632990612425 full average RMSE = 7.71161018236969 Average WMAPE for 2237 days: 13.258666237103316 full average WMAPE = 13.258666237103316
Predict: 100%|██████████| 6/6 [00:04<00:00, 1.32 batches/s]
pred_start_idx = 2021-04-24 04:00:00 22551
Average RMSE for 2237 days: 3.2641931157800625 full average RMSE = 4.64540572689366 Average WMAPE for 2237 days: 17.984809207420852 full average WMAPE = 17.984809207420852
Predict: 100%|██████████| 6/6 [00:04<00:00, 1.26 batches/s]
pred_start_idx = 2021-04-24 04:00:00 22551
Average RMSE for 2237 days: 3.2847563701385782 full average RMSE = 4.656651022819259 Average WMAPE for 2237 days: 18.23758345741423 full average WMAPE = 18.23758345741423
Predict: 100%|██████████| 6/6 [00:04<00:00, 1.33 batches/s]
pred_start_idx = 2021-04-24 04:00:00 22551
Average RMSE for 2237 days: 1.796155565489495 full average RMSE = 2.5524449873602024 Average WMAPE for 2237 days: 21.732299548936492 full average WMAPE = 21.732299548936492
Predict: 100%|██████████| 6/6 [00:06<00:00, 1.16s/ batches]
pred_start_idx = 2021-04-24 04:00:00 22551
Average RMSE for 2237 days: 11.488600804649083 full average RMSE = 17.61042179348071 Average WMAPE for 2237 days: 12.739401113262808 full average WMAPE = 12.73940111326281
Predict: 100%|██████████| 6/6 [00:04<00:00, 1.29 batches/s]
pred_start_idx = 2021-04-24 04:00:00 22551
Average RMSE for 2237 days: 2.253017434063478 full average RMSE = 3.279417940657122 Average WMAPE for 2237 days: 17.328143723997197 full average WMAPE = 17.3281437239972
Predict: 100%|██████████| 6/6 [00:08<00:00, 1.40s/ batches]
pred_start_idx = 2021-04-24 04:00:00 22551
Average RMSE for 2237 days: 15.07107733571748 full average RMSE = 20.776478223820572 Average WMAPE for 2237 days: 8.717234676856515 full average WMAPE = 8.717234676856515
Predict: 100%|██████████| 6/6 [00:04<00:00, 1.33 batches/s]
pred_start_idx = 2021-04-24 04:00:00 22551
Average RMSE for 2237 days: 6.953956191327671 full average RMSE = 9.929613710056111 Average WMAPE for 2237 days: 15.384665839960995 full average WMAPE = 15.384665839960995
Predict: 100%|██████████| 6/6 [00:09<00:00, 1.61s/ batches]
pred_start_idx = 2021-04-24 04:00:00 22551
Average RMSE for 2237 days: 13.871703173893607 full average RMSE = 18.941112895266222 Average WMAPE for 2237 days: 9.550044466412075 full average WMAPE = 9.550044466412075
Predict: 100%|██████████| 6/6 [00:04<00:00, 1.31 batches/s]
pred_start_idx = 2021-04-24 04:00:00 22551
Average RMSE for 2237 days: 3.3191774698256595 full average RMSE = 4.9065017578035635 Average WMAPE for 2237 days: 19.75693645787222 full average WMAPE = 19.756936457872225
Predict: 100%|██████████| 6/6 [00:04<00:00, 1.27 batches/s]
pred_start_idx = 2021-04-24 04:00:00 22551
Average RMSE for 2237 days: 4.8940545373267765 full average RMSE = 6.8247098968802895 Average WMAPE for 2237 days: 14.76068919835685 full average WMAPE = 14.760689198356847
Predict: 100%|██████████| 6/6 [00:07<00:00, 1.23s/ batches]
pred_start_idx = 2021-04-24 04:00:00 22551
Average RMSE for 2237 days: 17.69244523915959 full average RMSE = 35.37903831730715 Average WMAPE for 2237 days: 11.269069446917326 full average WMAPE = 11.269069446917324
Predict: 100%|██████████| 6/6 [00:04<00:00, 1.26 batches/s]
pred_start_idx = 2021-04-24 04:00:00 22551
Average RMSE for 2237 days: 5.1636119803308 full average RMSE = 7.428605201159186 Average WMAPE for 2237 days: 14.21807505760722 full average WMAPE = 14.21807505760722
Predict: 100%|██████████| 6/6 [00:04<00:00, 1.31 batches/s]
pred_start_idx = 2021-04-24 04:00:00 22551
Average RMSE for 2237 days: 3.5243629861421546 full average RMSE = 5.134489861919553 Average WMAPE for 2237 days: 15.077829143728435 full average WMAPE = 15.077829143728433
Predict: 100%|██████████| 6/6 [00:06<00:00, 1.04s/ batches]
pred_start_idx = 2021-04-24 04:00:00 22551
Average RMSE for 2237 days: 9.398748323647743 full average RMSE = 13.038833371192247 Average WMAPE for 2237 days: 9.751149446820133 full average WMAPE = 9.751149446820133
Predict: 100%|██████████| 6/6 [00:05<00:00, 1.03 batches/s]
pred_start_idx = 2021-04-24 04:00:00 22551
Average RMSE for 2237 days: 10.63969602145731 full average RMSE = 14.307982835492293 Average WMAPE for 2237 days: 10.735795656430644 full average WMAPE = 10.735795656430644
Predict: 100%|██████████| 6/6 [00:06<00:00, 1.15s/ batches]
pred_start_idx = 2021-04-24 04:00:00 22551
Average RMSE for 2237 days: 11.487706750111757 full average RMSE = 15.731287356014908 Average WMAPE for 2237 days: 9.799833775187503 full average WMAPE = 9.799833775187503
Predict: 100%|██████████| 6/6 [00:05<00:00, 1.04 batches/s]
pred_start_idx = 2021-04-24 04:00:00 22551
Average RMSE for 2237 days: 8.81001341081806 full average RMSE = 12.146880848579002 Average WMAPE for 2237 days: 12.000445098429827 full average WMAPE = 12.000445098429825
Predict: 100%|██████████| 6/6 [00:06<00:00, 1.06s/ batches]
pred_start_idx = 2021-04-24 04:00:00 22551
Average RMSE for 2237 days: 9.967367009387573 full average RMSE = 15.597430527820384 Average WMAPE for 2237 days: 13.725919343893828 full average WMAPE = 13.725919343893828
Predict: 100%|██████████| 6/6 [00:04<00:00, 1.32 batches/s]
pred_start_idx = 2021-04-24 04:00:00 22551
Average RMSE for 2237 days: 1.5055878408582923 full average RMSE = 2.2706854728833585 Average WMAPE for 2237 days: 21.953542972992153 full average WMAPE = 21.953542972992153
Predict: 100%|██████████| 6/6 [00:04<00:00, 1.32 batches/s]
pred_start_idx = 2021-04-24 04:00:00 22551
Average RMSE for 2237 days: 0.009387572641931158 full average RMSE = 0.11771934984769938 Average WMAPE for 2237 days: 0.6630904430474843 full average WMAPE = 0.6630904430474843
Predict: 100%|██████████| 6/6 [00:04<00:00, 1.30 batches/s]
pred_start_idx = 2021-04-24 04:00:00 22551
Average RMSE for 2237 days: 4.980330800178811 full average RMSE = 6.889607860283818 Average WMAPE for 2237 days: 14.760096277892302 full average WMAPE = 14.760096277892304
Predict: 100%|██████████| 6/6 [00:04<00:00, 1.33 batches/s]
pred_start_idx = 2021-04-24 04:00:00 22551
Average RMSE for 2237 days: 0.1318730442556996 full average RMSE = 0.4061441396066061 Average WMAPE for 2237 days: 4.324243764062981 full average WMAPE = 4.324243764062981
Predict: 100%|██████████| 6/6 [00:04<00:00, 1.29 batches/s]
pred_start_idx = 2021-04-24 04:00:00 22551
Average RMSE for 2237 days: 0.010728654447921324 full average RMSE = 0.11580508650271651 Average WMAPE for 2237 days: 0.4991804470210599 full average WMAPE = 0.49918044702105996
Predict: 100%|██████████| 6/6 [00:05<00:00, 1.14 batches/s]
pred_start_idx = 2021-04-24 04:00:00 22551
Average RMSE for 2237 days: 7.065265981224854 full average RMSE = 9.592665019302641 Average WMAPE for 2237 days: 11.15555764519327 full average WMAPE = 11.155557645193268
Predict: 100%|██████████| 6/6 [00:07<00:00, 1.25s/ batches]
pred_start_idx = 2021-04-24 04:00:00 22551
Average RMSE for 2237 days: 9.844881537773805 full average RMSE = 14.054288381285485 Average WMAPE for 2237 days: 10.55663900846162 full average WMAPE = 10.55663900846162
Predict: 100%|██████████| 6/6 [00:07<00:00, 1.27s/ batches]
pred_start_idx = 2021-04-24 04:00:00 22551
Average RMSE for 2237 days: 11.144389807778275 full average RMSE = 15.416001648920423 Average WMAPE for 2237 days: 9.229490373835649 full average WMAPE = 9.22949037383565
Predict: 100%|██████████| 6/6 [00:10<00:00, 1.71s/ batches]
pred_start_idx = 2021-04-24 04:00:00 22551
Average RMSE for 2237 days: 18.529280286097453 full average RMSE = 32.917243600284955 Average WMAPE for 2237 days: 11.541182815495675 full average WMAPE = 11.541182815495677
Predict: 100%|██████████| 6/6 [00:05<00:00, 1.18 batches/s]
pred_start_idx = 2021-04-24 04:00:00 22551
Average RMSE for 2237 days: 7.285203397407241 full average RMSE = 10.415871970878085 Average WMAPE for 2237 days: 12.80739203055414 full average WMAPE = 12.80739203055414
Predict: 100%|██████████| 6/6 [00:05<00:00, 1.18 batches/s]
pred_start_idx = 2021-04-24 04:00:00 22551
Average RMSE for 2237 days: 6.8511399195350915 full average RMSE = 9.874039056813833 Average WMAPE for 2237 days: 14.665103972290959 full average WMAPE = 14.66510397229096
Predict: 100%|██████████| 6/6 [00:05<00:00, 1.05 batches/s]
pred_start_idx = 2021-04-24 04:00:00 22551
Average RMSE for 2237 days: 9.796155565489494 full average RMSE = 18.54456146546358 Average WMAPE for 2237 days: 14.838820369014025 full average WMAPE = 14.83882036901402
Predict: 100%|██████████| 6/6 [00:04<00:00, 1.24 batches/s]
pred_start_idx = 2021-04-24 04:00:00 22551
Average RMSE for 2237 days: 6.053196244970943 full average RMSE = 8.772162048232296 Average WMAPE for 2237 days: 13.662684293454047 full average WMAPE = 13.662684293454047
Predict: 100%|██████████| 6/6 [00:04<00:00, 1.35 batches/s]
pred_start_idx = 2021-04-24 04:00:00 22551
Average RMSE for 2237 days: 1.3178363880196693 full average RMSE = 2.075034132674872 Average WMAPE for 2237 days: 19.728038565533815 full average WMAPE = 19.72803856553381
Predict: 100%|██████████| 6/6 [00:04<00:00, 1.29 batches/s]
pred_start_idx = 2021-04-24 04:00:00 22551
Average RMSE for 2237 days: 0.0232454179704962 full average RMSE = 0.174349804327709 Average WMAPE for 2237 days: 1.4230367942619082 full average WMAPE = 1.4230367942619084
Predict: 100%|██████████| 6/6 [00:05<00:00, 1.16 batches/s]
pred_start_idx = 2021-04-24 04:00:00 22551
Average RMSE for 2237 days: 7.302190433616451 full average RMSE = 10.81826548606364 Average WMAPE for 2237 days: 13.164128484334665 full average WMAPE = 13.164128484334665
Predict: 100%|██████████| 6/6 [00:11<00:00, 1.93s/ batches]
pred_start_idx = 2021-04-24 04:00:00 22551
Average RMSE for 2237 days: 31.248547161376845 full average RMSE = 53.777597109084375 Average WMAPE for 2237 days: 11.638168770667733 full average WMAPE = 11.638168770667736
Predict: 100%|██████████| 6/6 [00:11<00:00, 1.95s/ batches]
pred_start_idx = 2021-04-24 04:00:00 22551
Average RMSE for 2237 days: 18.443004023245418 full average RMSE = 26.668774130700715 Average WMAPE for 2237 days: 9.616359328612418 full average WMAPE = 9.616359328612418
Predict: 100%|██████████| 6/6 [00:09<00:00, 1.65s/ batches]
pred_start_idx = 2021-04-24 04:00:00 22551
Average RMSE for 2237 days: 14.789003129190881 full average RMSE = 21.85088858928306 Average WMAPE for 2237 days: 9.873889755719473 full average WMAPE = 9.873889755719475
Predict: 100%|██████████| 6/6 [00:08<00:00, 1.41s/ batches]
pred_start_idx = 2021-04-24 04:00:00 22551
Average RMSE for 2237 days: 12.517657577112203 full average RMSE = 18.48760896635373 Average WMAPE for 2237 days: 8.871166686373291 full average WMAPE = 8.871166686373293
Predict: 100%|██████████| 6/6 [00:04<00:00, 1.32 batches/s]
pred_start_idx = 2021-04-24 04:00:00 22551
Average RMSE for 2237 days: 4.485024586499777 full average RMSE = 6.639615060662582 Average WMAPE for 2237 days: 15.944465058680887 full average WMAPE = 15.94446505868089
Predict: 100%|██████████| 6/6 [00:10<00:00, 1.70s/ batches]
pred_start_idx = 2021-04-24 04:00:00 22551
Average RMSE for 2237 days: 14.071524362986143 full average RMSE = 19.323447392553096 Average WMAPE for 2237 days: 9.6320804499991 full average WMAPE = 9.632080449999098
Predict: 100%|██████████| 6/6 [00:11<00:00, 1.94s/ batches]
pred_start_idx = 2021-04-24 04:00:00 22551
Average RMSE for 2237 days: 24.776933392936968 full average RMSE = 34.75623008508422 Average WMAPE for 2237 days: 12.668159605645114 full average WMAPE = 12.668159605645112
Predict: 100%|██████████| 6/6 [00:04<00:00, 1.25 batches/s]
pred_start_idx = 2021-04-24 04:00:00 22551
Average RMSE for 2237 days: 0.11041573535985695 full average RMSE = 0.7986913749269209 Average WMAPE for 2237 days: 2.8500011246110657 full average WMAPE = 2.8500011246110657
Predict: 100%|██████████| 6/6 [00:04<00:00, 1.32 batches/s]
pred_start_idx = 2021-04-24 04:00:00 22551
Average RMSE for 2237 days: 0.0889584264640143 full average RMSE = 0.3227341841184003 Average WMAPE for 2237 days: 3.0502160515002728 full average WMAPE = 3.0502160515002728
Predict: 100%|██████████| 6/6 [00:04<00:00, 1.32 batches/s]
pred_start_idx = 2021-04-24 04:00:00 22551
Average RMSE for 2237 days: 3.0044702726866337 full average RMSE = 4.418688721036006 Average WMAPE for 2237 days: 17.235670222173077 full average WMAPE = 17.235670222173077
Predict: 100%|██████████| 6/6 [00:04<00:00, 1.24 batches/s]
pred_start_idx = 2021-04-24 04:00:00 22551
Average RMSE for 2237 days: 6.067054090299508 full average RMSE = 8.758672820429137 Average WMAPE for 2237 days: 13.56165237227415 full average WMAPE = 13.56165237227415
Predict: 100%|██████████| 6/6 [00:04<00:00, 1.28 batches/s]
pred_start_idx = 2021-04-24 04:00:00 22551
Average RMSE for 2237 days: 2.3366115333035316 full average RMSE = 3.2445408226364076 Average WMAPE for 2237 days: 20.796420031843454 full average WMAPE = 20.796420031843454
Predict: 100%|██████████| 6/6 [00:06<00:00, 1.13s/ batches]
pred_start_idx = 2021-04-24 04:00:00 22551
Average RMSE for 2237 days: 10.198033080017881 full average RMSE = 14.402276554759236 Average WMAPE for 2237 days: 9.808259285627635 full average WMAPE = 9.808259285627637
Predict: 100%|██████████| 6/6 [00:11<00:00, 1.87s/ batches]
pred_start_idx = 2021-04-24 04:00:00 22551
Average RMSE for 2237 days: 19.308001788109074 full average RMSE = 27.53279022051301 Average WMAPE for 2237 days: 9.170513136200752 full average WMAPE = 9.170513136200752
Predict: 100%|██████████| 6/6 [00:05<00:00, 1.04 batches/s]
pred_start_idx = 2021-04-24 04:00:00 22551
Average RMSE for 2237 days: 8.670987930263745 full average RMSE = 12.017365619871226 Average WMAPE for 2237 days: 11.540963349723782 full average WMAPE = 11.54096334972378
Predict: 100%|██████████| 6/6 [00:04<00:00, 1.24 batches/s]
pred_start_idx = 2021-04-24 04:00:00 22551
Average RMSE for 2237 days: 2.331247206079571 full average RMSE = 3.4294081721581477 Average WMAPE for 2237 days: 21.147475545107184 full average WMAPE = 21.147475545107184
Predict: 100%|██████████| 6/6 [00:05<00:00, 1.13 batches/s]
pred_start_idx = 2021-04-24 04:00:00 22551
Average RMSE for 2237 days: 8.305766651765758 full average RMSE = 11.506620900238135 Average WMAPE for 2237 days: 12.145337044721805 full average WMAPE = 12.145337044721805
Predict: 100%|██████████| 6/6 [00:09<00:00, 1.63s/ batches]
pred_start_idx = 2021-04-24 04:00:00 22551
Average RMSE for 2237 days: 12.096557890031292 full average RMSE = 17.20782015984459 Average WMAPE for 2237 days: 9.707331873567282 full average WMAPE = 9.707331873567284
Predict: 100%|██████████| 6/6 [00:11<00:00, 1.88s/ batches]
pred_start_idx = 2021-04-24 04:00:00 22551
Average RMSE for 2237 days: 18.843093428699152 full average RMSE = 29.583458993900777 Average WMAPE for 2237 days: 11.428323194822644 full average WMAPE = 11.42832319482264
Predict: 100%|██████████| 6/6 [00:11<00:00, 1.93s/ batches]
pred_start_idx = 2021-04-24 04:00:00 22551
Average RMSE for 2237 days: 22.975860527492177 full average RMSE = 37.23428585208286 Average WMAPE for 2237 days: 10.552509729559628 full average WMAPE = 10.552509729559628
Predict: 100%|██████████| 6/6 [00:06<00:00, 1.17s/ batches]
pred_start_idx = 2021-04-24 04:00:00 22551
Average RMSE for 2237 days: 10.289673670093876 full average RMSE = 14.910941100615846 Average WMAPE for 2237 days: 11.158224831105398 full average WMAPE = 11.1582248311054
Predict: 100%|██████████| 6/6 [00:10<00:00, 1.67s/ batches]
pred_start_idx = 2021-04-24 04:00:00 22551
Average RMSE for 2237 days: 14.448368350469378 full average RMSE = 20.814537490495137 Average WMAPE for 2237 days: 10.306013984021074 full average WMAPE = 10.306013984021074
Predict: 100%|██████████| 6/6 [00:04<00:00, 1.27 batches/s]
pred_start_idx = 2021-04-24 04:00:00 22551
Average RMSE for 2237 days: 0.4072418417523469 full average RMSE = 0.8755865267109306 Average WMAPE for 2237 days: 11.02174110188828 full average WMAPE = 11.021741101888281
Predict: 100%|██████████| 6/6 [00:04<00:00, 1.32 batches/s]
pred_start_idx = 2021-04-24 04:00:00 22551
Average RMSE for 2237 days: 1.3442109968708091 full average RMSE = 2.037094532662202 Average WMAPE for 2237 days: 20.00023500617681 full average WMAPE = 20.000235006176805
Predict: 100%|██████████| 6/6 [00:07<00:00, 1.21s/ batches]
pred_start_idx = 2021-04-24 04:00:00 22551
Average RMSE for 2237 days: 11.481895395619134 full average RMSE = 15.995571023124922 Average WMAPE for 2237 days: 12.574908839859752 full average WMAPE = 12.574908839859752
Predict: 100%|██████████| 6/6 [00:08<00:00, 1.38s/ batches]
pred_start_idx = 2021-04-24 04:00:00 22551
Average RMSE for 2237 days: 15.744300402324543 full average RMSE = 25.3475263945568 Average WMAPE for 2237 days: 11.335224034209494 full average WMAPE = 11.335224034209494
Predict: 100%|██████████| 6/6 [00:04<00:00, 1.27 batches/s]
pred_start_idx = 2021-04-24 04:00:00 22551
Average RMSE for 2237 days: 4.8994188645507375 full average RMSE = 7.088747087493776 Average WMAPE for 2237 days: 14.443808805807993 full average WMAPE = 14.443808805807995
Predict: 100%|██████████| 6/6 [00:05<00:00, 1.05 batches/s]
pred_start_idx = 2021-04-24 04:00:00 22551
Average RMSE for 2237 days: 9.13455520786768 full average RMSE = 13.275290890757999 Average WMAPE for 2237 days: 13.246682700545728 full average WMAPE = 13.246682700545728
Predict: 100%|██████████| 6/6 [00:06<00:00, 1.04s/ batches]
pred_start_idx = 2021-04-24 04:00:00 22551
Average RMSE for 2237 days: 9.842646401430487 full average RMSE = 13.431493092929356 Average WMAPE for 2237 days: 10.502509390565951 full average WMAPE = 10.502509390565951
Out[ ]:
'\nMachine Learning predictions END\n'